From commits-return-9882-archive-asf-public=cust-asf.ponee.io@manifoldcf.apache.org Sun Sep 30 15:59:57 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id AB79718064A for ; Sun, 30 Sep 2018 15:59:56 +0200 (CEST) Received: (qmail 90568 invoked by uid 500); 30 Sep 2018 13:59:55 -0000 Mailing-List: contact commits-help@manifoldcf.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@manifoldcf.apache.org Delivered-To: mailing list commits@manifoldcf.apache.org Received: (qmail 90559 invoked by uid 99); 30 Sep 2018 13:59:55 -0000 Received: from Unknown (HELO svn01-us-west.apache.org) (209.188.14.144) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 30 Sep 2018 13:59:55 +0000 Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id E28AB3A0162 for ; Sun, 30 Sep 2018 14:00:01 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1842375 - /manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java Date: Sun, 30 Sep 2018 14:00:01 -0000 To: commits@manifoldcf.apache.org From: kwright@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20180930140001.E28AB3A0162@svn01-us-west.apache.org> Author: kwright Date: Sun Sep 30 14:00:01 2018 New Revision: 1842375 URL: http://svn.apache.org/viewvc?rev=1842375&view=rev Log: Fix for CONNECTORS-1532. Modified: manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java Modified: manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java?rev=1842375&r1=1842374&r2=1842375&view=diff ============================================================================== --- manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java (original) +++ manifoldcf/trunk/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java Sun Sep 30 14:00:01 2018 @@ -1484,7 +1484,20 @@ public class DCTM extends org.apache.man String objName = object.getObjectName(); String contentType = object.getContentType(); - + // Check if content type is one of the allowed ones + if (!sDesc.contentTypeMatches(contentType)) + { + activityStatus = "MIMETYPEOUTOFSET"; + return; + } + String[] pathString = sDesc.getPathAttributeValue(object); + // Check if one of the paths is in the allowed set + if (!sDesc.pathMatches(pathString)) + { + activityStatus = "PATHMOVED"; + return; + } + // This particular way of getting content failed, because DFC loaded the // whole object into memory (very very bad DFC!) // InputStream is = objIDfSysObject.getContent(); @@ -1550,7 +1563,6 @@ public class DCTM extends org.apache.man String pathAttributeName = sDesc.getPathAttributeName(); if (pathAttributeName != null && pathAttributeName.length() > 0) { - String[] pathString = sDesc.getPathAttributeValue(object); rval.addField(pathAttributeName,pathString); } @@ -4367,24 +4379,55 @@ public class DCTM extends org.apache.man protected final boolean securityOn; /** Map of type to selected attributes */ protected final Map> typeMap = new HashMap>(); - + /** Set of allowed paths */ + protected final Set pathSet = new HashSet<>(); + /** Set of allowed mime types; null if all are allowed */ + protected final Set mimeTypeSet; + /** Constructor */ public SpecInfo(Specification spec) throws ManifoldCFException, ServiceInterruption { + Set mimeTypeSet = null; + boolean allMimeTypes = false; String pathAttributeName = null; boolean securityOn = true; for (int i = 0; i < spec.getChildCount(); i++) { SpecificationNode n = spec.getChild(i); - if (n.getType().equals(CONFIG_PARAM_PATHNAMEATTRIBUTE)) + if (n.getType().equals(CONFIG_PARAM_FORMAT_ALL)) + { + String all = n.getAttributeValue("value"); + if (all.equals("true")) + { + allMimeTypes = true; + } + } + else if (n.getType().equals(CONFIG_PARAM_FORMAT)) + { + String docType = n.getAttributeValue("value"); + if (mimeTypeSet == null) + mimeTypeSet = new HashSet(); + mimeTypeSet.add(docType); + } + else if (n.getType().equals(CONFIG_PARAM_PATHNAMEATTRIBUTE)) + { pathAttributeName = n.getAttributeValue("value"); + } else if (n.getType().equals(CONFIG_PARAM_PATHMAP)) { String pathMatch = n.getAttributeValue("match"); String pathReplace = n.getAttributeValue("replace"); matchMap.appendMatchPair(pathMatch,pathReplace); } + else if (n.getType().equals(CONFIG_PARAM_LOCATION)) + { + String strLocation = n.getAttributeValue("path"); + if (strLocation != null && strLocation.length() > 0) + { + pathSet.add(strLocation); + } + } else if (n.getType().equals("access")) { String token = n.getAttributeValue("token"); @@ -4441,6 +4484,21 @@ public class DCTM extends org.apache.man } this.pathAttributeName = pathAttributeName; this.securityOn = securityOn; + if (allMimeTypes) + { + this.mimeTypeSet = null; + } + else + { + if (mimeTypeSet == null) + { + this.mimeTypeSet = new HashSet<>(0); + } + else + { + this.mimeTypeSet = mimeTypeSet; + } + } } /** Get the path attribute name. @@ -4465,6 +4523,37 @@ public class DCTM extends org.apache.man return rval; } + /** Check if a set of paths contains one that matches the spec. + *@param documentPaths is the set of paths the document has. + *@return true if it does, false if not. + */ + public boolean pathMatches(final String[] documentPaths) + { + if (pathSet.size() == 0) { + return true; + } + for (final String path : documentPaths) { + if (pathSet.contains(path)) { + return true; + } + } + return false; + } + + /** Check if a document content type matches the spec. + *@param contentType is the mime type that the document has. + *@return true if it does, false if not. + */ + public boolean contentTypeMatches(final String contentType) + { + // Implement if we need to. It's not clear that the mime type of a document can change after-the-fact. + if (mimeTypeSet == null) { + return true; + } + final boolean rval = mimeTypeSet.contains(contentType); + return rval; + } + /** Grab forced acl out of document specification. *@param spec is the document specification. *@return the acls.