manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1842375 - /manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
Date Sun, 30 Sep 2018 14:00:01 GMT
Author: kwright
Date: Sun Sep 30 14:00:01 2018
New Revision: 1842375

URL: http://svn.apache.org/viewvc?rev=1842375&view=rev
Log:
Fix for CONNECTORS-1532.

Modified:
    manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java

Modified: manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java?rev=1842375&r1=1842374&r2=1842375&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
(original)
+++ manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
Sun Sep 30 14:00:01 2018
@@ -1484,7 +1484,20 @@ public class DCTM extends org.apache.man
           
           String objName = object.getObjectName();
           String contentType = object.getContentType();
-            
+          // Check if content type is one of the allowed ones
+          if (!sDesc.contentTypeMatches(contentType))
+          {
+            activityStatus = "MIMETYPEOUTOFSET";
+            return;
+          }
+          String[] pathString = sDesc.getPathAttributeValue(object);
+          // Check if one of the paths is in the allowed set
+          if (!sDesc.pathMatches(pathString))
+          {
+            activityStatus = "PATHMOVED";
+            return;
+          }
+          
           // This particular way of getting content failed, because DFC loaded the
           // whole object into memory (very very bad DFC!)
           // InputStream is = objIDfSysObject.getContent();
@@ -1550,7 +1563,6 @@ public class DCTM extends org.apache.man
           String pathAttributeName = sDesc.getPathAttributeName();
           if (pathAttributeName != null && pathAttributeName.length() > 0)
           {
-            String[] pathString = sDesc.getPathAttributeValue(object);
             rval.addField(pathAttributeName,pathString);
           }
 
@@ -4367,24 +4379,55 @@ public class DCTM extends org.apache.man
     protected final boolean securityOn;
     /** Map of type to selected attributes */
     protected final Map<String,List<String>> typeMap = new HashMap<String,List<String>>();
-
+    /** Set of allowed paths */
+    protected final Set<String> pathSet = new HashSet<>();
+    /** Set of allowed mime types; null if all are allowed */
+    protected final Set<String> mimeTypeSet;
+    
     /** Constructor */
     public SpecInfo(Specification spec)
       throws ManifoldCFException, ServiceInterruption
     {
+      Set<String> mimeTypeSet = null;
+      boolean allMimeTypes = false;
       String pathAttributeName = null;
       boolean securityOn = true;
       for (int i = 0; i < spec.getChildCount(); i++)
       {
         SpecificationNode n = spec.getChild(i);
-        if (n.getType().equals(CONFIG_PARAM_PATHNAMEATTRIBUTE))
+        if (n.getType().equals(CONFIG_PARAM_FORMAT_ALL))
+        {
+          String all = n.getAttributeValue("value");
+          if (all.equals("true"))
+          {
+            allMimeTypes = true;
+          }
+        }
+        else if (n.getType().equals(CONFIG_PARAM_FORMAT))
+        {
+          String docType = n.getAttributeValue("value");
+          if (mimeTypeSet == null)
+            mimeTypeSet = new HashSet<String>();
+          mimeTypeSet.add(docType);
+        }
+        else if (n.getType().equals(CONFIG_PARAM_PATHNAMEATTRIBUTE))
+        {
           pathAttributeName = n.getAttributeValue("value");
+        }
         else if (n.getType().equals(CONFIG_PARAM_PATHMAP))
         {
           String pathMatch = n.getAttributeValue("match");
           String pathReplace = n.getAttributeValue("replace");
           matchMap.appendMatchPair(pathMatch,pathReplace);
         }
+        else if (n.getType().equals(CONFIG_PARAM_LOCATION))
+        {
+          String strLocation = n.getAttributeValue("path");
+          if (strLocation != null && strLocation.length() > 0)
+          {
+            pathSet.add(strLocation);
+          }
+        }
         else if (n.getType().equals("access"))
         {
           String token = n.getAttributeValue("token");
@@ -4441,6 +4484,21 @@ public class DCTM extends org.apache.man
       }
       this.pathAttributeName = pathAttributeName;
       this.securityOn = securityOn;
+      if (allMimeTypes)
+      {
+        this.mimeTypeSet = null;
+      }
+      else
+      {
+        if (mimeTypeSet == null)
+        {
+          this.mimeTypeSet = new HashSet<>(0);
+        }
+        else
+        {
+          this.mimeTypeSet = mimeTypeSet;
+        }
+      }
     }
 
     /** Get the path attribute name.
@@ -4465,6 +4523,37 @@ public class DCTM extends org.apache.man
       return rval;
     }
 
+    /** Check if a set of paths contains one that matches the spec.
+    *@param documentPaths is the set of paths the document has.
+    *@return true if it does, false if not.
+    */
+    public boolean pathMatches(final String[] documentPaths)
+    {
+      if (pathSet.size() == 0) {
+        return true;
+      }
+      for (final String path : documentPaths) {
+        if (pathSet.contains(path)) {
+          return true;
+        }
+      }
+      return false;
+    }
+ 
+    /** Check if a document content type matches the spec.
+    *@param contentType is the mime type that the document has.
+    *@return true if it does, false if not.
+    */
+    public boolean contentTypeMatches(final String contentType)
+    {
+      // Implement if we need to.  It's not clear that the mime type of a document can change
after-the-fact.
+      if (mimeTypeSet == null) {
+        return true;
+      }
+      final boolean rval = mimeTypeSet.contains(contentType);
+      return rval;
+    }
+    
     /** Grab forced acl out of document specification.
     *@param spec is the document specification.
     *@return the acls.



Mime
View raw message