manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1626228 [3/10] - in /manifoldcf/branches/dev_1x: ./ connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/ connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/ conne...
Date Fri, 19 Sep 2014 14:22:28 GMT
Modified: manifoldcf/branches/dev_1x/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java?rev=1626228&r1=1626227&r2=1626228&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java (original)
+++ manifoldcf/branches/dev_1x/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java Fri Sep 19 14:22:27 2014
@@ -434,66 +434,27 @@ public class EmailConnector extends org.
     }
   }
 
-  /**
-  * Get document versions given an array of document identifiers.
-  * This method is called for EVERY document that is considered. It is therefore important to perform
-  * as little work as possible here.
-  * The connector will be connected before this method can be called.
-  *
-  * @param documentIdentifiers is the array of local document identifiers, as understood by this connector.
-  * @param oldVersions is the corresponding array of version strings that have been saved for the document identifiers.
-  * A null value indicates that this is a first-time fetch, while an empty string indicates that the previous document
-  * had an empty version string.
-  * @param activities is the interface this method should use to perform whatever framework actions are desired.
-  * @param spec is the current document specification for the current job. If there is a dependency on this
-  * specification, then the version string should include the pertinent data, so that reingestion will occur
-  * when the specification changes. This is primarily useful for metadata.
-  * @param jobMode is an integer describing how the job is being run, whether continuous or once-only.
-  * @param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
-  * @return the corresponding version strings, with null in the places where the document no longer exists.
-  * Empty version strings indicate that there is no versioning ability for the corresponding document, and the document
-  * will always be processed.
-  */
-  @Override
-  public String[] getDocumentVersions(String[] documentIdentifiers, String[] oldVersions, IVersionActivity activities,
-    DocumentSpecification spec, int jobMode, boolean usesDefaultAuthority)
-    throws ManifoldCFException, ServiceInterruption {
-
-    String[] result = new String[documentIdentifiers.length];
-    for (int i = 0; i < documentIdentifiers.length; i++)
-    {
-      result[i] = "_" + urlTemplate;   // NOT empty; we need to make ManifoldCF understand that this is a document that never will change.
-    }
-    return result;
-
-  }
-
-  /**
-  * Process a set of documents.
+  /** Process a set of documents.
   * This is the method that should cause each document to be fetched, processed, and the results either added
   * to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
   * The document specification allows this class to filter what is done based on the job.
   * The connector will be connected before this method can be called.
-  *
-  * @param documentIdentifiers is the set of document identifiers to process.
-  * @param versions is the corresponding document versions to process, as returned by getDocumentVersions() above.
-  * The implementation may choose to ignore this parameter and always process the current version.
-  * @param activities is the interface this method should use to queue up new document references
+  *@param documentIdentifiers is the set of document identifiers to process.
+  *@param statuses are the currently-stored document versions for each document in the set of document identifiers
+  * passed in above.
+  *@param activities is the interface this method should use to queue up new document references
   * and ingest documents.
-  * @param spec is the document specification.
-  * @param scanOnly is an array corresponding to the document identifiers. It is set to true to indicate when the processing
-  * should only find other references, and should not actually call the ingestion methods.
-  * @param jobMode is an integer describing how the job is being run, whether continuous or once-only.
+  *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
+  *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
   */
   @Override
-  public void processDocuments(String[] documentIdentifiers, String[] versions, IProcessActivity activities,
-    DocumentSpecification spec, boolean[] scanOnly, int jobMode)
+  public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
+    IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
     throws ManifoldCFException, ServiceInterruption {
-    getSession();
-    int i = 0;
+
     List<String> requiredMetadata = new ArrayList<String>();
-    while (i < spec.getChildCount()) {
-      SpecificationNode sn = spec.getChild(i++);
+    for (int i = 0; i < spec.getChildCount(); i++) {
+      SpecificationNode sn = spec.getChild(i);
       if (sn.getType().equals(EmailConfig.NODE_METADATA)) {
         String metadataAttribute = sn.getAttributeValue(EmailConfig.ATTRIBUTE_NAME);
         requiredMetadata.add(metadataAttribute);
@@ -503,124 +464,130 @@ public class EmailConnector extends org.
     // Keep a cached set of open folders
     Map<String,Folder> openFolders = new HashMap<String,Folder>();
     try {
-      i = 0;
-      while (i < documentIdentifiers.length) {
-        String compositeID = documentIdentifiers[i];
-        String version = versions[i];
-        String folderName = extractFolderNameFromDocumentIdentifier(compositeID);
-        String id = extractEmailIDFromDocumentIdentifier(compositeID);
-        try {
-          Folder folder = openFolders.get(folderName);
-          if (folder == null)
-          {
-            OpenFolderThread oft = new OpenFolderThread(session, folderName);
-            oft.start();
-            folder = oft.finishUp();
-            openFolders.put(folderName,folder);
-          }
-          
-          long startTime = System.currentTimeMillis();
-          InputStream is = null;
-          if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("Email: Processing document identifier '"
-              + compositeID + "'");
-          SearchTerm messageIDTerm = new MessageIDTerm(id);
-          
-          SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm);
-          smt.start();
-          Message[] message = smt.finishUp();
-
-          for (Message msg : message) {
-            RepositoryDocument rd = new RepositoryDocument();
-            Date setDate = msg.getSentDate();
-            rd.setFileName(msg.getFileName());
-            is = msg.getInputStream();
-            rd.setBinary(is, msg.getSize());
-            String subject = StringUtils.EMPTY;
-            for (String metadata : requiredMetadata) {
-              if (metadata.toLowerCase().equals(EmailConfig.EMAIL_TO)) {
-                Address[] to = msg.getRecipients(Message.RecipientType.TO);
-                String[] toStr = new String[to.length];
-                int j = 0;
-                for (Address address : to) {
-                  toStr[j] = address.toString();
-                }
-                rd.addField(EmailConfig.EMAIL_TO, toStr);
-              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_FROM)) {
-                Address[] from = msg.getFrom();
-                String[] fromStr = new String[from.length];
-                int j = 0;
-                for (Address address : from) {
-                  fromStr[j] = address.toString();
-                }
-                rd.addField(EmailConfig.EMAIL_TO, fromStr);
 
-              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_SUBJECT)) {
-                subject = msg.getSubject();
-                rd.addField(EmailConfig.EMAIL_SUBJECT, subject);
-              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_BODY)) {
-                Multipart mp = (Multipart) msg.getContent();
-                for (int k = 0, n = mp.getCount(); k < n; k++) {
-                  Part part = mp.getBodyPart(k);
-                  String disposition = part.getDisposition();
-                  if ((disposition == null)) {
-                    MimeBodyPart mbp = (MimeBodyPart) part;
-                    if (mbp.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) {
-                      rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString());
-                    } else if (mbp.isMimeType(EmailConfig.MIMETYPE_HTML)) {
-                      rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString()); //handle html accordingly. Returns content with html tags
+      for (String documentIdentifier : documentIdentifiers) {
+        String versionString = "_" + urlTemplate;   // NOT empty; we need to make ManifoldCF understand that this is a document that never will change.
+        
+        // Check if we need to index
+        if (activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
+        {
+          String compositeID = documentIdentifier;
+          String version = versionString;
+          String folderName = extractFolderNameFromDocumentIdentifier(compositeID);
+          String id = extractEmailIDFromDocumentIdentifier(compositeID);
+          try {
+            Folder folder = openFolders.get(folderName);
+            if (folder == null)
+            {
+              getSession();
+              OpenFolderThread oft = new OpenFolderThread(session, folderName);
+              oft.start();
+              folder = oft.finishUp();
+              openFolders.put(folderName,folder);
+            }
+            
+            long startTime = System.currentTimeMillis();
+            InputStream is = null;
+            if (Logging.connectors.isDebugEnabled())
+              Logging.connectors.debug("Email: Processing document identifier '"
+                + compositeID + "'");
+            SearchTerm messageIDTerm = new MessageIDTerm(id);
+            
+            getSession();
+            SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm);
+            smt.start();
+            Message[] message = smt.finishUp();
+
+            for (Message msg : message) {
+              RepositoryDocument rd = new RepositoryDocument();
+              Date setDate = msg.getSentDate();
+              rd.setFileName(msg.getFileName());
+              is = msg.getInputStream();
+              rd.setBinary(is, msg.getSize());
+              String subject = StringUtils.EMPTY;
+              for (String metadata : requiredMetadata) {
+                if (metadata.toLowerCase().equals(EmailConfig.EMAIL_TO)) {
+                  Address[] to = msg.getRecipients(Message.RecipientType.TO);
+                  String[] toStr = new String[to.length];
+                  int j = 0;
+                  for (Address address : to) {
+                    toStr[j] = address.toString();
+                  }
+                  rd.addField(EmailConfig.EMAIL_TO, toStr);
+                } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_FROM)) {
+                  Address[] from = msg.getFrom();
+                  String[] fromStr = new String[from.length];
+                  int j = 0;
+                  for (Address address : from) {
+                    fromStr[j] = address.toString();
+                  }
+                  rd.addField(EmailConfig.EMAIL_TO, fromStr);
+
+                } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_SUBJECT)) {
+                  subject = msg.getSubject();
+                  rd.addField(EmailConfig.EMAIL_SUBJECT, subject);
+                } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_BODY)) {
+                  Multipart mp = (Multipart) msg.getContent();
+                  for (int k = 0, n = mp.getCount(); k < n; k++) {
+                    Part part = mp.getBodyPart(k);
+                    String disposition = part.getDisposition();
+                    if ((disposition == null)) {
+                      MimeBodyPart mbp = (MimeBodyPart) part;
+                      if (mbp.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) {
+                        rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString());
+                      } else if (mbp.isMimeType(EmailConfig.MIMETYPE_HTML)) {
+                        rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString()); //handle html accordingly. Returns content with html tags
+                      }
                     }
                   }
-                }
-              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_DATE)) {
-                Date sentDate = msg.getSentDate();
-                rd.addField(EmailConfig.EMAIL_DATE, sentDate.toString());
-              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_ATTACHMENT_ENCODING)) {
-                Multipart mp = (Multipart) msg.getContent();
-                if (mp != null) {
-                  String[] encoding = new String[mp.getCount()];
+                } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_DATE)) {
+                  Date sentDate = msg.getSentDate();
+                  rd.addField(EmailConfig.EMAIL_DATE, sentDate.toString());
+                } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_ATTACHMENT_ENCODING)) {
+                  Multipart mp = (Multipart) msg.getContent();
+                  if (mp != null) {
+                    String[] encoding = new String[mp.getCount()];
+                    for (int k = 0, n = mp.getCount(); k < n; k++) {
+                      Part part = mp.getBodyPart(k);
+                      String disposition = part.getDisposition();
+                      if ((disposition != null) &&
+                          ((disposition.equals(Part.ATTACHMENT) ||
+                              (disposition.equals(Part.INLINE))))) {
+                        encoding[k] = part.getFileName().split("\\?")[1];
+
+                      }
+                    }
+                    rd.addField(EmailConfig.ENCODING_FIELD, encoding);
+                  }
+                } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_ATTACHMENT_MIMETYPE)) {
+                  Multipart mp = (Multipart) msg.getContent();
+                  String[] MIMEType = new String[mp.getCount()];
                   for (int k = 0, n = mp.getCount(); k < n; k++) {
                     Part part = mp.getBodyPart(k);
                     String disposition = part.getDisposition();
                     if ((disposition != null) &&
                         ((disposition.equals(Part.ATTACHMENT) ||
                             (disposition.equals(Part.INLINE))))) {
-                      encoding[k] = part.getFileName().split("\\?")[1];
+                      MIMEType[k] = part.getContentType();
 
                     }
                   }
-                  rd.addField(EmailConfig.ENCODING_FIELD, encoding);
-                }
-              } else if (metadata.toLowerCase().equals(EmailConfig.EMAIL_ATTACHMENT_MIMETYPE)) {
-                Multipart mp = (Multipart) msg.getContent();
-                String[] MIMEType = new String[mp.getCount()];
-                for (int k = 0, n = mp.getCount(); k < n; k++) {
-                  Part part = mp.getBodyPart(k);
-                  String disposition = part.getDisposition();
-                  if ((disposition != null) &&
-                      ((disposition.equals(Part.ATTACHMENT) ||
-                          (disposition.equals(Part.INLINE))))) {
-                    MIMEType[k] = part.getContentType();
-
-                  }
+                  rd.addField(EmailConfig.MIMETYPE_FIELD, MIMEType);
                 }
-                rd.addField(EmailConfig.MIMETYPE_FIELD, MIMEType);
               }
-            }
-            String documentURI = makeDocumentURI(urlTemplate, folderName, id);
-            activities.ingestDocumentWithException(id, version, documentURI, rd);
+              String documentURI = makeDocumentURI(urlTemplate, folderName, id);
+              activities.ingestDocumentWithException(id, version, documentURI, rd);
 
+            }
+          } catch (InterruptedException e) {
+            throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
+          } catch (MessagingException e) {
+            handleMessagingException(e, "processing email");
+          } catch (IOException e) {
+            handleIOException(e, "processing email");
+            throw new ManifoldCFException(e.getMessage(), e);
           }
-        } catch (InterruptedException e) {
-          throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
-        } catch (MessagingException e) {
-          handleMessagingException(e, "processing email");
-        } catch (IOException e) {
-          handleIOException(e, "processing email");
-          throw new ManifoldCFException(e.getMessage(), e);
         }
-        
-        i++;
       }
     }
     finally
@@ -643,6 +610,7 @@ public class EmailConnector extends org.
         }
       }
     }
+
   }
 
   //////////////////////////////End of Repository Connector Methods///////////////////////////////////

Modified: manifoldcf/branches/dev_1x/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java?rev=1626228&r1=1626227&r2=1626228&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java (original)
+++ manifoldcf/branches/dev_1x/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java Fri Sep 19 14:22:27 2014
@@ -133,9 +133,28 @@ public class FilenetConnector extends or
       }
     }
 
-    public Throwable getException()
+    public void finishUp()
+      throws java.net.MalformedURLException, NotBoundException, RemoteException, FilenetException, InterruptedException
     {
-      return exception;
+      join();
+      Throwable thr = exception;
+      if (thr != null)
+      {
+        if (thr instanceof java.net.MalformedURLException)
+          throw (java.net.MalformedURLException)thr;
+        else if (thr instanceof NotBoundException)
+          throw (NotBoundException)thr;
+        else if (thr instanceof RemoteException)
+          throw (RemoteException)thr;
+        else if (thr instanceof FilenetException)
+          throw (FilenetException)thr;
+        else if (thr instanceof RuntimeException)
+          throw (RuntimeException)thr;
+        else if (thr instanceof Error)
+          throw (Error)thr;
+        else
+          throw new RuntimeException("Unexpected exception type: "+thr.getClass().getName()+": "+thr.getMessage(),thr);
+      }
     }
   }
 
@@ -184,26 +203,10 @@ public class FilenetConnector extends or
 
       long currentTime;
       GetSessionThread t = new GetSessionThread();
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof java.net.MalformedURLException)
-            throw (java.net.MalformedURLException)thr;
-          else if (thr instanceof NotBoundException)
-            throw (NotBoundException)thr;
-          else if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
+        t.finishUp();
       }
       catch (InterruptedException e)
       {
@@ -272,9 +275,24 @@ public class FilenetConnector extends or
       }
     }
 
-    public Throwable getException()
+    public void finishUp()
+      throws RemoteException, FilenetException, InterruptedException
     {
-      return exception;
+      join();
+      Throwable thr = exception;
+      if (thr != null)
+      {
+        if (thr instanceof RemoteException)
+          throw (RemoteException)thr;
+        else if (thr instanceof FilenetException)
+          throw (FilenetException)thr;
+        else if (thr instanceof RuntimeException)
+          throw (RuntimeException)thr;
+        else if (thr instanceof Error)
+          throw (Error)thr;
+        else
+          throw new RuntimeException("Unexpected exception type: "+thr.getClass().getName()+": "+thr.getMessage(),thr);
+      }
     }
 
   }
@@ -291,22 +309,10 @@ public class FilenetConnector extends or
     if (currentTime >= lastSessionFetch + timeToRelease)
     {
       DestroySessionThread t = new DestroySessionThread();
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
+        t.finishUp();
         session = null;
         lastSessionFetch = -1L;
       }
@@ -492,22 +498,10 @@ public class FilenetConnector extends or
     if (session != null)
     {
       DestroySessionThread t = new DestroySessionThread();
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
+        t.finishUp();
         session = null;
         lastSessionFetch = -1L;
       }
@@ -891,90 +885,47 @@ public class FilenetConnector extends or
     return value - digit * divisor;
   }
 
-  /** Get document versions given an array of document identifiers.
-  * This method is called for EVERY document that is considered. It is
-  * therefore important to perform as little work as possible here.
-  *@param documentIdentifiers is the array of local document identifiers, as understood by this connector.
-  *@param oldVersions is the corresponding array of version strings that have been saved for the document identifiers.
-  *   A null value indicates that this is a first-time fetch, while an empty string indicates that the previous document
-  *   had an empty version string.
-  *@param activity is the interface this method should use to perform whatever framework actions are desired.
-  *@param spec is the current document specification for the current job.  If there is a dependency on this
-  * specification, then the version string should include the pertinent data, so that reingestion will occur
-  * when the specification changes.  This is primarily useful for metadata.
+  /** Process a set of documents.
+  * This is the method that should cause each document to be fetched, processed, and the results either added
+  * to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
+  * The document specification allows this class to filter what is done based on the job.
+  * The connector will be connected before this method can be called.
+  *@param documentIdentifiers is the set of document identifiers to process.
+  *@param statuses are the currently-stored document versions for each document in the set of document identifiers
+  * passed in above.
+  *@param activities is the interface this method should use to queue up new document references
+  * and ingest documents.
   *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
   *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
-  *@return the corresponding version strings, with null in the places where the document no longer exists.
-  * Empty version strings indicate that there is no versioning ability for the corresponding document, and the document
-  * will always be processed.
   */
   @Override
-  public String[] getDocumentVersions(String[] documentIdentifiers, String[] oldVersions, IVersionActivity activity,
-    DocumentSpecification spec, int jobMode, boolean usesDefaultAuthority)
+  public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
+    IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
     throws ManifoldCFException, ServiceInterruption
   {
-    Logging.connectors.debug("FileNet: Inside getDocumentVersions");
-
-    String[] acls = getAcls(spec);
-
-    String[] rval = new String[documentIdentifiers.length];
+    Logging.connectors.debug("FileNet: Inside processDocuments");
 
-    // Put together a set of the metadata fields, from the document specification
-    int i = 0;
-    HashMap docClassSpecs = new HashMap();
-    // Also calculate the fields we will need to retrieve from each document, on a document class basis
-    HashMap metadataFields = new HashMap();
-    while (i < spec.getChildCount())
-    {
-      SpecificationNode sn = spec.getChild(i++);
-      if (sn.getType().equals(SPEC_NODE_DOCUMENTCLASS))
-      {
-        String value = sn.getAttributeValue(SPEC_ATTRIBUTE_VALUE);
-        DocClassSpec classSpec = new DocClassSpec(sn);
-        docClassSpecs.put(value,classSpec);
-        if (classSpec.getAllMetadata())
-          metadataFields.put(value,new Boolean(true));
-        else
-        {
-          HashMap sumMap = new HashMap();
-          int j = 0;
-          String[] fields = classSpec.getMetadataFields();
-          while (j < fields.length)
-          {
-            String field = fields[j++];
-            sumMap.put(field,field);
-          }
-          j = 0;
-          while (j < classSpec.getMatchCount())
-          {
-            String field = classSpec.getMatchField(j++);
-            sumMap.put(field,field);
-          }
-          // Convert to an array
-          String[] fieldArray = new String[sumMap.size()];
-          Iterator iter = sumMap.keySet().iterator();
-          j = 0;
-          while (iter.hasNext())
-          {
-            fieldArray[j++] = (String)iter.next();
-          }
-          metadataFields.put(value,fieldArray);
-        }
-      }
-    }
+    SpecInfo dSpec = new SpecInfo(spec);
+    
+    String[] acls = dSpec.getAcls();
 
 
-    for (i=0; i<documentIdentifiers.length; i++)
+    for (String documentIdentifier : documentIdentifiers)
     {
       // For each document, be sure to confirm job still active
-      activity.checkJobStillActive();
-
-      String documentIdentifier = documentIdentifiers[i];
+      activities.checkJobStillActive();
 
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("Filenet: Getting version for identifier '"+documentIdentifier+"'");
 
       // Calculate the version id and the element number
+      String versionString;
+      String[] aclValues = null;
+      String[] denyAclValues = null;
+      String docClass = null;
+      String[] metadataFieldNames = null;
+      String[] metadataFieldValues = null;
+      
       int cIndex = documentIdentifier.indexOf(",");
       if (cIndex != -1)
       {
@@ -983,12 +934,13 @@ public class FilenetConnector extends or
         long currentTime;
         try
         {
-          FileInfo fileInfo = doGetDocumentInformation(vId, metadataFields);
+          FileInfo fileInfo = doGetDocumentInformation(vId, dSpec.getMetadataFields());
           if (fileInfo == null)
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("FileNet: Skipping document '"+documentIdentifier+"' because not a current document");
-            rval[i] = null;
+            activities.deleteDocument(documentIdentifier);
+            continue;
           }
           else
           {
@@ -1002,8 +954,8 @@ public class FilenetConnector extends or
             // (c) the url prefix to use
             StringBuilder versionBuffer = new StringBuilder();
 
-            String docClass = fileInfo.getDocClass();
-            DocClassSpec docclassspec = (DocClassSpec)docClassSpecs.get(docClass);
+            docClass = fileInfo.getDocClass();
+            DocClassSpec docclassspec = dSpec.getDocClassSpec(docClass);
 
             // First, verify that this document matches the match criteria
             boolean docMatches = true;
@@ -1033,7 +985,7 @@ public class FilenetConnector extends or
                 if (docclassspec.checkMetadataIncluded(field))
                   metadataCount++;
               }
-              String[] metadataFieldNames = new String[metadataCount];
+              metadataFieldNames = new String[metadataCount];
               int j = 0;
               iter = fileInfo.getMetadataIterator();
               while (iter.hasNext())
@@ -1046,7 +998,7 @@ public class FilenetConnector extends or
               // Pack field names and values
               // For sanity, pack the names first and then the values!
               packList(versionBuffer,metadataFieldNames,'+');
-              String[] metadataFieldValues = new String[metadataFieldNames.length];
+              metadataFieldValues = new String[metadataFieldNames.length];
               j = 0;
               while (j < metadataFieldValues.length)
               {
@@ -1059,8 +1011,6 @@ public class FilenetConnector extends or
 
               // Acl info
               // Future work will add "forced acls", so use a single character as a signal as to whether security is on or off.
-              String[] aclValues = null;
-              String[] denyAclValues = null;
               if (acls != null && acls.length == 0)
               {
                 // Security is on, so use the acls that came back from filenet
@@ -1104,13 +1054,14 @@ public class FilenetConnector extends or
               // Document URI
               pack(versionBuffer,docURIPrefix,'+');
 
-              rval[i] = versionBuffer.toString();
+              versionString = versionBuffer.toString();
             }
             else
             {
               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug("FileNet: Skipping document '"+documentIdentifier+"' because doesn't match field criteria");
-              rval[i] = null;
+              activities.deleteDocument(documentIdentifier);
+              continue;
             }
           }
         }
@@ -1124,8 +1075,8 @@ public class FilenetConnector extends or
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("FileNet: Skipping file '"+documentIdentifier+"' because: "+e.getMessage(),e);
-            rval[i] = null;
-
+            activities.deleteDocument(documentIdentifier);
+            continue;
           }
           else
             throw new ManifoldCFException(e.getMessage(),e);
@@ -1136,73 +1087,25 @@ public class FilenetConnector extends or
         // This is a naked version identifier.
         // On every crawl, we need to convert this identifier to the individual identifiers for each bit of content.
         // There is no versioning available for this process.
-        rval[i] = "";
+        versionString = "";
       }
-    }
-    return rval;
-  }
-
-  /** Emulate the query matching for filenet sql expressions. */
-  protected static boolean performMatch(String matchType, String matchDocValue, String matchValue)
-  {
-    if (matchType.equals("="))
-      return matchDocValue.equalsIgnoreCase(matchValue);
-    else if (matchType.equals("!="))
-      return !matchDocValue.equalsIgnoreCase(matchValue);
-
-    // Do a LIKE comparison
-    return likeMatch(matchDocValue,0,matchValue,0);
-  }
-
-  /** Match a portion of a string with SQL wildcards (%) */
-  protected static boolean likeMatch(String matchDocValue, int matchDocPos, String matchValue, int matchPos)
-  {
-    if (matchPos == matchValue.length())
-    {
-      return matchDocPos == matchDocValue.length();
-    }
-    if (matchDocPos == matchDocValue.length())
-    {
-      return matchValue.charAt(matchPos) == '%' && likeMatch(matchDocValue,matchDocPos,matchValue,matchPos+1);
-    }
-    char x = matchDocValue.charAt(matchDocPos);
-    char y = matchValue.charAt(matchPos);
-    if (y != '%')
-      return Character.toLowerCase(x) == Character.toLowerCase(y) && likeMatch(matchDocValue,matchDocPos+1,matchValue,matchPos+1);
-
-    return likeMatch(matchDocValue,matchDocPos+1,matchValue,matchPos) ||
-      likeMatch(matchDocValue,matchDocPos,matchValue,matchPos+1);
-  }
-
-  /** Process documents whose versions indicate they need processing.
-  */
-  public void processDocuments(String[] documentIdentifiers, String[] documentVersions,
-    IProcessActivity activities, DocumentSpecification spec, boolean[] scanOnly)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    Logging.connectors.debug("FileNet: Inside processDocuments");
-
-    int i = 0;
-    while (i < documentIdentifiers.length)
-    {
-      // For each document, be sure to confirm job still active
-      activities.checkJobStillActive();
-
-      String documentIdentifier = documentIdentifiers[i];
-      String documentVersion = documentVersions[i];
+      
+      if (versionString.length() == 0 || activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
+      {
+        // For each document, be sure to confirm job still active
+        activities.checkJobStillActive();
 
-      if (Logging.connectors.isDebugEnabled())
-        Logging.connectors.debug("FileNet: Processing document identifier '"+documentIdentifier+"'");
+        String documentVersion = versionString;
 
-      // Calculate the version id and the element number
-      int cIndex = documentIdentifier.indexOf(",");
-      if (cIndex != -1)
-      {
         if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("FileNet: Document identifier '"+documentIdentifier+"' is a document attachment");
+          Logging.connectors.debug("FileNet: Processing document identifier '"+documentIdentifier+"'");
 
-        if (!scanOnly[i])
+        // Calculate the version id and the element number
+        if (cIndex != -1)
         {
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("FileNet: Document identifier '"+documentIdentifier+"' is a document attachment");
+
           String vId = documentIdentifier.substring(0,cIndex);
           int elementNumber;
           try
@@ -1214,30 +1117,6 @@ public class FilenetConnector extends or
             throw new ManifoldCFException("Bad number in identifier: "+documentIdentifier,e);
           }
 
-          // Unpack the information in the document version
-          ArrayList metadataNames = new ArrayList();
-          ArrayList metadataValues = new ArrayList();
-          ArrayList aclValues = null;
-          ArrayList denyAclValues = null;
-          StringBuilder documentClass = new StringBuilder();
-          StringBuilder urlBase = new StringBuilder();
-          int position = 0;
-          position = unpackList(metadataNames, documentVersion, position, '+');
-          position = unpackList(metadataValues, documentVersion, position, '+');
-          //Logging.connectors.debug("Names length = "+Integer.toString(metadataNames.size()));
-          //Logging.connectors.debug("Values length = "+Integer.toString(metadataValues.size()));
-          if (documentVersion.length() > position && documentVersion.charAt(position++) == '+')
-          {
-            //Logging.connectors.debug("Acls found at position "+Integer.toString(position));
-            aclValues = new ArrayList();
-            position = unpackList(aclValues, documentVersion, position, '+');
-            denyAclValues = new ArrayList();
-            position = unpackList(denyAclValues, documentVersion, position, '+');
-            //Logging.connectors.debug("ACLs length = "+Integer.toString(aclValues.size()));
-          }
-          position = unpack(documentClass, documentVersion, position, '+');
-          position = unpack(urlBase, documentVersion, position, '+');
-
           //Logging.connectors.debug("Url base from version string = "+urlBase.toString());
           try
           {
@@ -1268,7 +1147,6 @@ public class FilenetConnector extends or
                   if (Logging.connectors.isDebugEnabled())
                     Logging.connectors.debug("FileNet: Removing file '"+documentIdentifier+"' because: "+e.getMessage(),e);
                   activities.noDocument(documentIdentifier,documentVersion);
-                  i++;
                   continue;
                 }
                 else
@@ -1295,42 +1173,26 @@ public class FilenetConnector extends or
                   rd.setBinary(is, fileLength);
 
                   // Apply metadata
-                  int j = 0;
-                  while (j < metadataNames.size())
+                  for (int j = 0; j < metadataFieldNames.length; j++)
                   {
-                    String metadataName = (String)metadataNames.get(j);
-                    String metadataValue = (String)metadataValues.get(j);
+                    String metadataName = metadataFieldNames[j];
+                    String metadataValue = metadataFieldValues[j];
                     rd.addField(metadataName,metadataValue);
-                    j++;
                   }
 
                   // Apply acls
                   if (aclValues != null)
                   {
-                    String[] acls = new String[aclValues.size()];
-                    j = 0;
-                    while (j < aclValues.size())
-                    {
-                      acls[j] = (String)aclValues.get(j);
-                      j++;
-                    }
-                    rd.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,acls);
+                    rd.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,aclValues);
                   }
                   if (denyAclValues != null)
                   {
-                    String[] denyAcls = new String[denyAclValues.size()];
-                    j = 0;
-                    while (j < denyAclValues.size())
-                    {
-                      denyAcls[j] = (String)denyAclValues.get(j);
-                      j++;
-                    }
-                    rd.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,denyAcls);
+                    rd.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,denyAclValues);
                   }
 
                   // Ingest
                   activities.ingestDocumentWithException(documentIdentifier,documentVersion,
-                    convertToURI(urlBase.toString(),vId,elementNumber,documentClass.toString()),rd);
+                    convertToURI(docURIPrefix,vId,elementNumber,docClass),rd);
 
                 }
                 finally
@@ -1368,6 +1230,7 @@ public class FilenetConnector extends or
             throw new ManifoldCFException("IO Exception ingesting document '"+documentIdentifier+"': "+e.getMessage(),e);
           }
         }
+        
       }
       else
       {
@@ -1384,8 +1247,7 @@ public class FilenetConnector extends or
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("FileNet: Removing version '"+documentIdentifier+"' because it seems to no longer exist");
 
-            activities.noDocument(documentIdentifier,documentVersion);
-            i++;
+            activities.noDocument(documentIdentifier,versionString);
             continue;
           }
 
@@ -1414,8 +1276,7 @@ public class FilenetConnector extends or
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("FileNet: Removing version '"+documentIdentifier+"' because: "+e.getMessage(),e);
-            activities.noDocument(documentIdentifier,documentVersion);
-            i++;
+            activities.noDocument(documentIdentifier,versionString);
             continue;
           }
           else
@@ -1424,23 +1285,40 @@ public class FilenetConnector extends or
           }
         }
       }
-      i++;
+      
     }
+  }
+
+  /** Emulate the query matching for filenet sql expressions. */
+  protected static boolean performMatch(String matchType, String matchDocValue, String matchValue)
+  {
+    if (matchType.equals("="))
+      return matchDocValue.equalsIgnoreCase(matchValue);
+    else if (matchType.equals("!="))
+      return !matchDocValue.equalsIgnoreCase(matchValue);
 
+    // Do a LIKE comparison
+    return likeMatch(matchDocValue,0,matchValue,0);
   }
 
-  /** Free a set of documents.  This method is called for all documents whose versions have been fetched using
-  * the getDocumentVersions() method, including those that returned null versions.  It may be used to free resources
-  * committed during the getDocumentVersions() method.  It is guaranteed to be called AFTER any calls to
-  * processDocuments() for the documents in question.
-  *@param documentIdentifiers is the set of document identifiers.
-  *@param versions is the corresponding set of version identifiers (individual identifiers may be null).
-  */
-  @Override
-  public void releaseDocumentVersions(String[] documentIdentifiers, String[] versions)
-    throws ManifoldCFException
+  /** Match a portion of a string with SQL wildcards (%) */
+  protected static boolean likeMatch(String matchDocValue, int matchDocPos, String matchValue, int matchPos)
   {
-    // Nothing to do
+    if (matchPos == matchValue.length())
+    {
+      return matchDocPos == matchDocValue.length();
+    }
+    if (matchDocPos == matchDocValue.length())
+    {
+      return matchValue.charAt(matchPos) == '%' && likeMatch(matchDocValue,matchDocPos,matchValue,matchPos+1);
+    }
+    char x = matchDocValue.charAt(matchDocPos);
+    char y = matchValue.charAt(matchPos);
+    if (y != '%')
+      return Character.toLowerCase(x) == Character.toLowerCase(y) && likeMatch(matchDocValue,matchDocPos+1,matchValue,matchPos+1);
+
+    return likeMatch(matchDocValue,matchDocPos+1,matchValue,matchPos) ||
+      likeMatch(matchDocValue,matchDocPos,matchValue,matchPos+1);
   }
 
   @Override
@@ -3302,11 +3180,25 @@ public class FilenetConnector extends or
       }
     }
 
-    public Throwable getException()
+    public void finishUp()
+      throws RemoteException, FilenetException, InterruptedException
     {
-      return exception;
+      join();
+      Throwable thr = exception;
+      if (thr != null)
+      {
+        if (thr instanceof RemoteException)
+          throw (RemoteException)thr;
+        else if (thr instanceof FilenetException)
+          throw (FilenetException)thr;
+        else if (thr instanceof RuntimeException)
+          throw (RuntimeException)thr;
+        else if (thr instanceof Error)
+          throw (Error)thr;
+        else
+          throw new RuntimeException("Unexpected exception type: "+thr.getClass().getName()+": "+thr.getMessage(),thr);
+      }
     }
-
   }
 
   /** Check connection, with appropriate retries */
@@ -3319,22 +3211,10 @@ public class FilenetConnector extends or
       getSession();
       long currentTime;
       CheckConnectionThread t = new CheckConnectionThread();
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
+        t.finishUp();
         return;
       }
       catch (InterruptedException e)
@@ -3382,13 +3262,24 @@ public class FilenetConnector extends or
       }
     }
 
-    public Throwable getException()
-    {
-      return exception;
-    }
-
-    public DocumentClassDefinition[] getResponse()
+    public DocumentClassDefinition[] finishUp()
+      throws RemoteException, FilenetException, InterruptedException
     {
+      join();
+      Throwable thr = exception;
+      if (thr != null)
+      {
+        if (thr instanceof RemoteException)
+          throw (RemoteException)thr;
+        else if (thr instanceof FilenetException)
+          throw (FilenetException)thr;
+        else if (thr instanceof RuntimeException)
+          throw (RuntimeException)thr;
+        else if (thr instanceof Error)
+          throw (Error)thr;
+        else
+          throw new RuntimeException("Unexpected exception type: "+thr.getClass().getName()+": "+thr.getMessage(),thr);
+      }
       return rval;
     }
   }
@@ -3403,23 +3294,10 @@ public class FilenetConnector extends or
       getSession();
       long currentTime;
       GetDocumentClassesInfoThread t = new GetDocumentClassesInfoThread();
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
-        return t.getResponse();
+        return t.finishUp();
       }
       catch (InterruptedException e)
       {
@@ -3468,13 +3346,24 @@ public class FilenetConnector extends or
       }
     }
 
-    public Throwable getException()
-    {
-      return exception;
-    }
-
-    public MetadataFieldDefinition[] getResponse()
+    public MetadataFieldDefinition[] finishUp()
+      throws RemoteException, FilenetException, InterruptedException
     {
+      join();
+      Throwable thr = exception;
+      if (thr != null)
+      {
+        if (thr instanceof RemoteException)
+          throw (RemoteException)thr;
+        else if (thr instanceof FilenetException)
+          throw (FilenetException)thr;
+        else if (thr instanceof RuntimeException)
+          throw (RuntimeException)thr;
+        else if (thr instanceof Error)
+          throw (Error)thr;
+        else
+          throw new RuntimeException("Unexpected exception type: "+thr.getClass().getName()+": "+thr.getMessage(),thr);
+      }
       return rval;
     }
   }
@@ -3489,23 +3378,10 @@ public class FilenetConnector extends or
       getSession();
       long currentTime;
       GetDocumentClassesMetadataFieldsInfoThread t = new GetDocumentClassesMetadataFieldsInfoThread(documentClassName);
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
-        return t.getResponse();
+        return t.finishUp();
       }
       catch (InterruptedException e)
       {
@@ -3531,7 +3407,7 @@ public class FilenetConnector extends or
 
   protected class GetChildFoldersThread extends Thread
   {
-    protected String[] folderPath;
+    protected final String[] folderPath;
     protected String[] rval = null;
     protected Throwable exception = null;
 
@@ -3554,16 +3430,27 @@ public class FilenetConnector extends or
       }
     }
 
-    public String[] getResponse()
+    public String[] finishUp()
+      throws RemoteException, FilenetException, InterruptedException
     {
+      join();
+      Throwable thr = exception;
+      if (thr != null)
+      {
+        if (thr instanceof RemoteException)
+          throw (RemoteException)thr;
+        else if (thr instanceof FilenetException)
+          throw (FilenetException)thr;
+        else if (thr instanceof RuntimeException)
+          throw (RuntimeException)thr;
+        else if (thr instanceof Error)
+          throw (Error)thr;
+        else
+          throw new RuntimeException("Unexpected exception type: "+thr.getClass().getName()+": "+thr.getMessage(),thr);
+      }
       return rval;
     }
 
-    public Throwable getException()
-    {
-      return exception;
-    }
-
   }
 
 
@@ -3577,23 +3464,10 @@ public class FilenetConnector extends or
       getSession();
       long currentTime;
       GetChildFoldersThread t = new GetChildFoldersThread(folderPath);
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
-        return t.getResponse();
+        return t.finishUp();
       }
       catch (InterruptedException e)
       {
@@ -3642,16 +3516,26 @@ public class FilenetConnector extends or
       }
     }
 
-    public String[] getResponse()
+    public String[] finishUp()
+      throws RemoteException, FilenetException, InterruptedException
     {
+      join();
+      Throwable thr = exception;
+      if (thr != null)
+      {
+        if (thr instanceof RemoteException)
+          throw (RemoteException)thr;
+        else if (thr instanceof FilenetException)
+          throw (FilenetException)thr;
+        else if (thr instanceof RuntimeException)
+          throw (RuntimeException)thr;
+        else if (thr instanceof Error)
+          throw (Error)thr;
+        else
+          throw new RuntimeException("Unexpected exception type: "+thr.getClass().getName()+": "+thr.getMessage(),thr);
+      }
       return rval;
     }
-
-    public Throwable getException()
-    {
-      return exception;
-    }
-
   }
   
   /** Get matching object id's for a given query */
@@ -3664,23 +3548,10 @@ public class FilenetConnector extends or
       getSession();
       long currentTime;
       GetMatchingObjectIdsThread t = new GetMatchingObjectIdsThread(sql);
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
-        return t.getResponse();
+        return t.finishUp();
       }
       catch (InterruptedException e)
       {
@@ -3729,16 +3600,27 @@ public class FilenetConnector extends or
       }
     }
 
-    public Integer getResponse()
+    public Integer finishUp()
+      throws RemoteException, FilenetException, InterruptedException
     {
+      join();
+      Throwable thr = exception;
+      if (thr != null)
+      {
+        if (thr instanceof RemoteException)
+          throw (RemoteException)thr;
+        else if (thr instanceof FilenetException)
+          throw (FilenetException)thr;
+        else if (thr instanceof RuntimeException)
+          throw (RuntimeException)thr;
+        else if (thr instanceof Error)
+          throw (Error)thr;
+        else
+          throw new RuntimeException("Unexpected exception type: "+thr.getClass().getName()+": "+thr.getMessage(),thr);
+      }
       return rval;
     }
 
-    public Throwable getException()
-    {
-      return exception;
-    }
-
   }
 
   protected Integer doGetDocumentContentCount(String documentIdentifier)
@@ -3750,23 +3632,10 @@ public class FilenetConnector extends or
       getSession();
       long currentTime;
       GetDocumentContentCountThread t = new GetDocumentContentCountThread(documentIdentifier);
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
-        return t.getResponse();
+        return t.finishUp();
       }
       catch (InterruptedException e)
       {
@@ -3793,12 +3662,12 @@ public class FilenetConnector extends or
 
   protected class GetDocumentInformationThread extends Thread
   {
-    protected String docId;
-    protected HashMap metadataFields;
+    protected final String docId;
+    protected final Map<String,Object> metadataFields;
     protected FileInfo rval = null;
     protected Throwable exception = null;
 
-    public GetDocumentInformationThread(String docId, HashMap metadataFields)
+    public GetDocumentInformationThread(String docId, Map<String,Object> metadataFields)
     {
       super();
       setDaemon(true);
@@ -3818,20 +3687,31 @@ public class FilenetConnector extends or
       }
     }
 
-    public FileInfo getResponse()
+    public FileInfo finishUp()
+      throws RemoteException, FilenetException, InterruptedException
     {
+      join();
+      Throwable thr = exception;
+      if (thr != null)
+      {
+        if (thr instanceof RemoteException)
+          throw (RemoteException)thr;
+        else if (thr instanceof FilenetException)
+          throw (FilenetException)thr;
+        else if (thr instanceof RuntimeException)
+          throw (RuntimeException)thr;
+        else if (thr instanceof Error)
+          throw (Error)thr;
+        else
+          throw new RuntimeException("Unexpected exception type: "+thr.getClass().getName()+": "+thr.getMessage(),thr);
+      }
       return rval;
     }
 
-    public Throwable getException()
-    {
-      return exception;
-    }
-
   }
 
   /** Get document info */
-  protected FileInfo doGetDocumentInformation(String docId, HashMap metadataFields)
+  protected FileInfo doGetDocumentInformation(String docId, Map<String,Object> metadataFields)
     throws FilenetException, ManifoldCFException, ServiceInterruption
   {
     while (true)
@@ -3840,23 +3720,10 @@ public class FilenetConnector extends or
       getSession();
       long currentTime;
       GetDocumentInformationThread t = new GetDocumentInformationThread(docId,metadataFields);
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
-        return t.getResponse();
+        return t.finishUp();
       }
       catch (InterruptedException e)
       {
@@ -3882,9 +3749,10 @@ public class FilenetConnector extends or
 
   protected class GetDocumentContentsThread extends Thread
   {
-    protected String docId;
-    protected int elementNumber;
-    protected String tempFileName;
+    protected final String docId;
+    protected final int elementNumber;
+    protected final String tempFileName;
+    
     protected Throwable exception = null;
 
     public GetDocumentContentsThread(String docId, int elementNumber, String tempFileName)
@@ -3908,11 +3776,25 @@ public class FilenetConnector extends or
       }
     }
 
-    public Throwable getException()
+    public void finishUp()
+      throws RemoteException, FilenetException, InterruptedException
     {
-      return exception;
+      join();
+      Throwable thr = exception;
+      if (thr != null)
+      {
+        if (thr instanceof RemoteException)
+          throw (RemoteException)thr;
+        else if (thr instanceof FilenetException)
+          throw (FilenetException)thr;
+        else if (thr instanceof RuntimeException)
+          throw (RuntimeException)thr;
+        else if (thr instanceof Error)
+          throw (Error)thr;
+        else
+          throw new RuntimeException("Unexpected exception type: "+thr.getClass().getName()+": "+thr.getMessage(),thr);
+      }
     }
-
   }
 
   /** Get document contents */
@@ -3925,22 +3807,10 @@ public class FilenetConnector extends or
       getSession();
       long currentTime;
       GetDocumentContentsThread t = new GetDocumentContentsThread(docId,elementNumber,tempFileName);
+      t.start();
       try
       {
-        t.start();
-        t.join();
-        Throwable thr = t.getException();
-        if (thr != null)
-        {
-          if (thr instanceof RemoteException)
-            throw (RemoteException)thr;
-          else if (thr instanceof FilenetException)
-            throw (FilenetException)thr;
-          else if (thr instanceof RuntimeException)
-            throw (RuntimeException)thr;
-          else
-            throw (Error)thr;
-        }
+        t.finishUp();
         return;
       }
       catch (InterruptedException e)
@@ -3967,45 +3837,96 @@ public class FilenetConnector extends or
   }
 
 
-  // Utility methods
+  // Utility classes/methods
 
-  /** Grab forced acl out of document specification.
-  *@param spec is the document specification.
-  *@return the acls.
-  */
-  protected static String[] getAcls(DocumentSpecification spec)
+  protected static class SpecInfo
   {
-    HashMap map = new HashMap();
-    int i = 0;
-    boolean securityOn = true;
-    while (i < spec.getChildCount())
+    protected final Set<String> aclMap = new HashSet<String>();
+    protected final boolean securityOn;
+    protected final Map<String,DocClassSpec> docClassSpecs = new HashMap<String,DocClassSpec>();
+    protected final Map<String,Object> metadataFields = new HashMap<String,Object>();
+
+    public SpecInfo(Specification spec)
     {
-      SpecificationNode sn = spec.getChild(i++);
-      if (sn.getType().equals("access"))
+      boolean securityOn = true;
+      for (int i = 0; i < spec.getChildCount(); i++)
       {
-        String token = sn.getAttributeValue("token");
-        map.put(token,token);
+        SpecificationNode sn = spec.getChild(i);
+        if (sn.getType().equals("access"))
+        {
+          String token = sn.getAttributeValue("token");
+          aclMap.add(token);
+        }
+        else if (sn.getType().equals("security"))
+        {
+          String value = sn.getAttributeValue("value");
+          if (value.equals("on"))
+            securityOn = true;
+          else if (value.equals("off"))
+            securityOn = false;
+        }
+        else if (sn.getType().equals(SPEC_NODE_DOCUMENTCLASS))
+        {
+          String value = sn.getAttributeValue(SPEC_ATTRIBUTE_VALUE);
+          DocClassSpec classSpec = new DocClassSpec(sn);
+          docClassSpecs.put(value,classSpec);
+          if (classSpec.getAllMetadata())
+            metadataFields.put(value,new Boolean(true));
+          else
+          {
+            Set<String> sumMap = new HashSet<String>();
+            int j = 0;
+            String[] fields = classSpec.getMetadataFields();
+            for (String field : fields)
+            {
+              sumMap.add(field);
+            }
+            for (j = 0; j < classSpec.getMatchCount(); j++)
+            {
+              sumMap.add(classSpec.getMatchField(j));
+            }
+            // Convert to an array
+            String[] fieldArray = new String[sumMap.size()];
+            j = 0;
+            for (String field : sumMap)
+            {
+              fieldArray[j++] = field;
+            }
+            metadataFields.put(value,fieldArray);
+          }
+        }
+
       }
-      else if (sn.getType().equals("security"))
+      
+      this.securityOn = securityOn;
+
+    }
+    
+    public String[] getAcls()
+    {
+      if (!securityOn)
+        return null;
+
+      String[] rval = new String[aclMap.size()];
+      int i = 0;
+      for (String acl : aclMap)
       {
-        String value = sn.getAttributeValue("value");
-        if (value.equals("on"))
-          securityOn = true;
-        else if (value.equals("off"))
-          securityOn = false;
+        rval[i++] = acl;
       }
+      return rval;
+    }
+    
+    public DocClassSpec getDocClassSpec(String docClass)
+    {
+      return docClassSpecs.get(docClass);
     }
-    if (!securityOn)
-      return null;
 
-    String[] rval = new String[map.size()];
-    Iterator iter = map.keySet().iterator();
-    i = 0;
-    while (iter.hasNext())
+    public Map<String,Object> getMetadataFields()
     {
-      rval[i++] = (String)iter.next();
+      return metadataFields;
     }
-    return rval;
-  }
+  
 
+  }
+  
 }

Modified: manifoldcf/branches/dev_1x/connectors/filenet/implementation/src/main/java/org/apache/manifoldcf/crawler/common/filenet/FilenetImpl.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/filenet/implementation/src/main/java/org/apache/manifoldcf/crawler/common/filenet/FilenetImpl.java?rev=1626228&r1=1626227&r2=1626228&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/filenet/implementation/src/main/java/org/apache/manifoldcf/crawler/common/filenet/FilenetImpl.java (original)
+++ manifoldcf/branches/dev_1x/connectors/filenet/implementation/src/main/java/org/apache/manifoldcf/crawler/common/filenet/FilenetImpl.java Fri Sep 19 14:22:27 2014
@@ -349,7 +349,7 @@ public class FilenetImpl extends Unicast
   /** Get document information for a given filenet document.  Will return null if the version id is not a current document version id.
   * The metadataFields hashmap is keyed by document class, and contains as a value either Boolean(true) (meaning "all"), or a String[] that has the
   * list of fields desired. */
-  public FileInfo getDocumentInformation(String docId, HashMap metadataFields)
+  public FileInfo getDocumentInformation(String docId, Map<String,Object> metadataFields)
     throws FilenetException, RemoteException
   {
     //System.out.println("Looking for document information on "+docId);

Modified: manifoldcf/branches/dev_1x/connectors/filenet/interface/src/main/java/org/apache/manifoldcf/crawler/common/filenet/IFilenet.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/filenet/interface/src/main/java/org/apache/manifoldcf/crawler/common/filenet/IFilenet.java?rev=1626228&r1=1626227&r2=1626228&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/filenet/interface/src/main/java/org/apache/manifoldcf/crawler/common/filenet/IFilenet.java (original)
+++ manifoldcf/branches/dev_1x/connectors/filenet/interface/src/main/java/org/apache/manifoldcf/crawler/common/filenet/IFilenet.java Fri Sep 19 14:22:27 2014
@@ -79,7 +79,7 @@ public interface IFilenet extends Remote
   /** Get document information for a given filenet document.  Will return null if the version id is not a current document version id.
   * The metadataFields hashmap is keyed by document class, and contains as a value either null (meaning "all"), or a String[] that has the
   * list of fields desired. */
-  public FileInfo getDocumentInformation(String docId, HashMap metadataFields)
+  public FileInfo getDocumentInformation(String docId, Map<String,Object> metadataFields)
     throws FilenetException, RemoteException;
 
   /** Get document contents */

Modified: manifoldcf/branches/dev_1x/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java?rev=1626228&r1=1626227&r2=1626228&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java (original)
+++ manifoldcf/branches/dev_1x/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java Fri Sep 19 14:22:27 2014
@@ -186,35 +186,26 @@ public class FileConnector extends org.a
   }
 
 
-  /** Get document versions given an array of document identifiers.
-  * This method is called for EVERY document that is considered. It is therefore important to perform
-  * as little work as possible here.
+  /** Process a set of documents.
+  * This is the method that should cause each document to be fetched, processed, and the results either added
+  * to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
+  * The document specification allows this class to filter what is done based on the job.
   * The connector will be connected before this method can be called.
-  *@param documentIdentifiers is the array of local document identifiers, as understood by this connector.
-  *@param oldVersions is the corresponding array of version strings that have been saved for the document identifiers.
-  *   A null value indicates that this is a first-time fetch, while an empty string indicates that the previous document
-  *   had an empty version string.
-  *@param activities is the interface this method should use to perform whatever framework actions are desired.
-  *@param spec is the current document specification for the current job.  If there is a dependency on this
-  * specification, then the version string should include the pertinent data, so that reingestion will occur
-  * when the specification changes.  This is primarily useful for metadata.
+  *@param documentIdentifiers is the set of document identifiers to process.
+  *@param statuses are the currently-stored document versions for each document in the set of document identifiers
+  * passed in above.
+  *@param activities is the interface this method should use to queue up new document references
+  * and ingest documents.
   *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
   *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
-  *@return the corresponding version strings, with null in the places where the document no longer exists.
-  * Empty version strings indicate that there is no versioning ability for the corresponding document, and the document
-  * will always be processed.
   */
-  public String[] getDocumentVersions(String[] documentIdentifiers, String[] oldVersions, IVersionActivity activities,
-    DocumentSpecification spec, int jobMode, boolean usesDefaultAuthority)
+  @Override
+  public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
+    IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
     throws ManifoldCFException, ServiceInterruption
   {
-    int i = 0;
-    
-    String[] rval = new String[documentIdentifiers.length];
-    i = 0;
-    while (i < rval.length)
+    for (String documentIdentifier : documentIdentifiers)
     {
-      String documentIdentifier = documentIdentifiers[i];
       File file = new File(documentIdentifier);
       if (file.exists())
       {
@@ -226,70 +217,7 @@ public class FileConnector extends org.a
           //long lastModified = file.lastModified();
           //rval[i] = new Long(lastModified).toString();
 
-          // Signal that we don't have any versioning and that we should recheck always.
-          rval[i] = "";
-        }
-        else
-        {
-          // It's a file
-          long fileLength = file.length();
-          if (activities.checkLengthIndexable(fileLength))
-          {
-            // Get the file's modified date.
-            long lastModified = file.lastModified();
-            
-            // Check if the path is to be converted.  We record that info in the version string so that we'll reindex documents whose
-            // URI's change.
-            String convertPath = findConvertPath(spec, file);
-            StringBuilder sb = new StringBuilder();
-            if (convertPath != null)
-            {
-              // Record the path.
-              sb.append("+");
-              pack(sb,convertPath,'+');
-            }
-            else
-              sb.append("-");
-            sb.append(new Long(lastModified).toString()).append(":").append(new Long(fileLength).toString());
-            rval[i] = sb.toString();
-          }
-          else
-            rval[i] = null;
-        }
-      }
-      else
-        rval[i] = null;
-      i++;
-    }
-    return rval;
-  }
-
-
-  /** Process a set of documents.
-  * This is the method that should cause each document to be fetched, processed, and the results either added
-  * to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
-  * The document specification allows this class to filter what is done based on the job.
-  *@param documentIdentifiers is the set of document identifiers to process.
-  *@param activities is the interface this method should use to queue up new document references
-  * and ingest documents.
-  *@param spec is the document specification.
-  *@param scanOnly is an array corresponding to the document identifiers.  It is set to true to indicate when the processing
-  * should only find other references, and should not actually call the ingestion methods.
-  */
-  @Override
-  public void processDocuments(String[] documentIdentifiers, String[] versions, IProcessActivity activities, DocumentSpecification spec, boolean[] scanOnly)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    int i = 0;
-    while (i < documentIdentifiers.length)
-    {
-      String version = versions[i];
-      String documentIdentifier = documentIdentifiers[i];
-      File file = new File(documentIdentifier);
-      if (file.exists())
-      {
-        if (file.isDirectory())
-        {
+          // No versioning; just reference children
           // Chained connectors scan parent nodes always
           // Queue up stuff for directory
           long startTime = System.currentTimeMillis();
@@ -324,31 +252,50 @@ public class FileConnector extends org.a
           {
             activities.recordActivity(new Long(startTime),ACTIVITY_READ,null,entityReference,errorCode,errorDesc,null);
           }
+          continue;
         }
         else
         {
-          if (!scanOnly[i])
+          // It's a file
+          String versionString;
+          String convertPath;
+          long fileLength = file.length();
+          if (activities.checkLengthIndexable(fileLength))
+          {
+            // Get the file's modified date.
+            long lastModified = file.lastModified();
+            
+            // Check if the path is to be converted.  We record that info in the version string so that we'll reindex documents whose
+            // URI's change.
+            convertPath = findConvertPath(spec, file);
+            StringBuilder sb = new StringBuilder();
+            if (convertPath != null)
+            {
+              // Record the path.
+              sb.append("+");
+              pack(sb,convertPath,'+');
+            }
+            else
+              sb.append("-");
+            sb.append(new Long(lastModified).toString()).append(":").append(new Long(fileLength).toString());
+            versionString = sb.toString();
+          }
+          else
+          {
+            activities.deleteDocument(documentIdentifier);
+            continue;
+          }
+    
+          if (activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
           {
             // We've already avoided queuing documents that we don't want, based on file specifications.
             // We still need to check based on file data.
             if (checkIngest(file,spec))
             {
-              
-              /*
-               * get filepathtouri value
-               */
-              String convertPath = null;
-              if (version.length() > 0 && version.startsWith("+"))
-              {
-                StringBuilder unpack = new StringBuilder();
-                unpack(unpack, version, 1, '+');
-                convertPath = unpack.toString();
-              }
-              
               long startTime = System.currentTimeMillis();
               String errorCode = "OK";
               String errorDesc = null;
-              Long fileLength = null;
+              Long fileLengthLong = null;
               String entityDescription = documentIdentifier;
               try
               {
@@ -358,9 +305,8 @@ public class FileConnector extends org.a
                   InputStream is = new FileInputStream(file);
                   try
                   {
-                    long fileBytes = file.length();
                     RepositoryDocument data = new RepositoryDocument();
-                    data.setBinary(is,fileBytes);
+                    data.setBinary(is,fileLength);
                     String fileName = file.getName();
                     data.setFileName(fileName);
                     data.setMimeType(mapExtensionToMimeType(fileName));
@@ -375,8 +321,8 @@ public class FileConnector extends org.a
                       data.addField("uri",file.toString());
                     }
                     // MHL for other metadata
-                    activities.ingestDocumentWithException(documentIdentifier,version,uri,data);
-                    fileLength = new Long(fileBytes);
+                    activities.ingestDocumentWithException(documentIdentifier,versionString,uri,data);
+                    fileLengthLong = new Long(fileLength);
                   }
                   finally
                   {
@@ -401,13 +347,17 @@ public class FileConnector extends org.a
               }
               finally
               {
-                activities.recordActivity(new Long(startTime),ACTIVITY_READ,fileLength,entityDescription,errorCode,errorDesc,null);
+                activities.recordActivity(new Long(startTime),ACTIVITY_READ,fileLengthLong,entityDescription,errorCode,errorDesc,null);
               }
             }
           }
         }
       }
-      i++;
+      else
+      {
+        activities.deleteDocument(documentIdentifier);
+        continue;
+      }
     }
   }
 
@@ -417,7 +367,7 @@ public class FileConnector extends org.a
   *@param documentIdentifier is the document identifier.
   *@return the part of the path to be converted, or null.
   */
-  protected static String findConvertPath(DocumentSpecification spec, File theFile)
+  protected static String findConvertPath(Specification spec, File theFile)
   {
     String fullpath = theFile.getAbsolutePath().replaceAll("\\\\","/");
     for (int j = 0; j < spec.getChildCount(); j++)
@@ -1074,7 +1024,7 @@ public class FileConnector extends org.a
   *@param documentSpecification is the specification.
   *@return true if it should be included.
   */
-  protected static boolean checkInclude(File file, String fileName, DocumentSpecification documentSpecification)
+  protected static boolean checkInclude(File file, String fileName, Specification documentSpecification)
     throws ManifoldCFException
   {
     if (Logging.connectors.isDebugEnabled())
@@ -1179,7 +1129,7 @@ public class FileConnector extends org.a
   *@param file is the file.
   *@param documentSpecification is the specification.
   */
-  protected static boolean checkIngest(File file, DocumentSpecification documentSpecification)
+  protected static boolean checkIngest(File file, Specification documentSpecification)
     throws ManifoldCFException
   {
     // Since the only exclusions at this point are not based on file contents, this is a no-op.



Mime
View raw message