Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 81145200CD8 for ; Wed, 19 Jul 2017 07:50:33 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 7F83A1682CA; Wed, 19 Jul 2017 05:50:33 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 7421F1682C8 for ; Wed, 19 Jul 2017 07:50:32 +0200 (CEST) Received: (qmail 18911 invoked by uid 500); 19 Jul 2017 05:50:31 -0000 Mailing-List: contact commits-help@atlas.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@atlas.apache.org Delivered-To: mailing list commits@atlas.apache.org Received: (qmail 18902 invoked by uid 99); 19 Jul 2017 05:50:31 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 19 Jul 2017 05:50:31 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id 2B0D71A0B4E for ; Wed, 19 Jul 2017 05:50:31 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -4.222 X-Spam-Level: X-Spam-Status: No, score=-4.222 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-0.001, SPF_PASS=-0.001] autolearn=disabled Received: from mx1-lw-us.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id T4AeGY0usuQA for ; Wed, 19 Jul 2017 05:50:28 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-lw-us.apache.org (ASF Mail Server at mx1-lw-us.apache.org) with SMTP id 525AF5FBCD for ; Wed, 19 Jul 2017 05:50:28 +0000 (UTC) Received: (qmail 18891 invoked by uid 99); 19 Jul 2017 05:50:27 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 19 Jul 2017 05:50:27 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id B612CE8E6B; Wed, 19 Jul 2017 05:50:27 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: madhan@apache.org To: commits@atlas.incubator.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: incubator-atlas git commit: ATLAS-1961: Basic search improvement in use of Solr index for attribute filtering (# 2) Date: Wed, 19 Jul 2017 05:50:27 +0000 (UTC) archived-at: Wed, 19 Jul 2017 05:50:33 -0000 Repository: incubator-atlas Updated Branches: refs/heads/0.8-incubating 17446d009 -> 7592bd4f3 ATLAS-1961: Basic search improvement in use of Solr index for attribute filtering (# 2) (cherry picked from commit cfb6b84f41b05275db826bbd43e4c39145b6d2d5) Project: http://git-wip-us.apache.org/repos/asf/incubator-atlas/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-atlas/commit/7592bd4f Tree: http://git-wip-us.apache.org/repos/asf/incubator-atlas/tree/7592bd4f Diff: http://git-wip-us.apache.org/repos/asf/incubator-atlas/diff/7592bd4f Branch: refs/heads/0.8-incubating Commit: 7592bd4f3c843566f3e78b3fbfdffe7b49db2fe4 Parents: 17446d0 Author: Madhan Neethiraj Authored: Tue Jul 18 13:27:46 2017 -0700 Committer: Madhan Neethiraj Committed: Tue Jul 18 22:50:10 2017 -0700 ---------------------------------------------------------------------- .../atlas/discovery/EntitySearchProcessor.java | 30 +++++++++++----- .../discovery/FullTextSearchProcessor.java | 36 ++++++++++++++++++-- .../apache/atlas/discovery/SearchContext.java | 9 +++-- .../apache/atlas/discovery/SearchProcessor.java | 4 +-- 4 files changed, 61 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/7592bd4f/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java index 3204ecf..50376ef 100644 --- a/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java +++ b/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java @@ -20,6 +20,7 @@ package org.apache.atlas.discovery; import org.apache.atlas.model.discovery.SearchParameters.FilterCriteria; import org.apache.atlas.repository.Constants; import org.apache.atlas.repository.graphdb.*; +import org.apache.atlas.type.AtlasClassificationType; import org.apache.atlas.type.AtlasEntityType; import org.apache.atlas.utils.AtlasPerfTracer; import org.apache.commons.collections.CollectionUtils; @@ -39,18 +40,21 @@ public class EntitySearchProcessor extends SearchProcessor { public EntitySearchProcessor(SearchContext context) { super(context); - final AtlasEntityType entityType = context.getEntityType(); - final FilterCriteria filterCriteria = context.getSearchParameters().getEntityFilters(); - final Set typeAndSubTypes = entityType.getTypeAndAllSubTypes(); - final Set solrAttributes = new HashSet<>(); - final Set gremlinAttributes = new HashSet<>(); - final Set allAttributes = new HashSet<>(); + final AtlasEntityType entityType = context.getEntityType(); + final FilterCriteria filterCriteria = context.getSearchParameters().getEntityFilters(); + final Set typeAndSubTypes = entityType.getTypeAndAllSubTypes(); + final Set solrAttributes = new HashSet<>(); + final Set gremlinAttributes = new HashSet<>(); + final Set allAttributes = new HashSet<>(); + + final AtlasClassificationType classificationType = context.getClassificationType(); + final boolean filterClassification = classificationType != null && !context.needClassificationProcessor(); processSearchAttributes(entityType, filterCriteria, solrAttributes, gremlinAttributes, allAttributes); - final boolean typeSearchBySolr = typeAndSubTypes.size() <= MAX_ENTITY_TYPES_IN_INDEX_QUERY; - final boolean attrSearchBySolr = canApplySolrFilter(entityType, filterCriteria, false); + final boolean typeSearchBySolr = !filterClassification && typeAndSubTypes.size() <= MAX_ENTITY_TYPES_IN_INDEX_QUERY; + final boolean attrSearchBySolr = !filterClassification && CollectionUtils.isNotEmpty(solrAttributes) && canApplySolrFilter(entityType, filterCriteria, false); StringBuilder solrQuery = new StringBuilder(); @@ -82,6 +86,10 @@ public class EntitySearchProcessor extends SearchProcessor { query.in(Constants.TYPE_NAME_PROPERTY_KEY, typeAndSubTypes); } + if (filterClassification) { + query.in(Constants.TRAIT_NAMES_PROPERTY_KEY, classificationType.getTypeAndAllSubTypes()); + } + graphQuery = toGremlinFilterQuery(entityType, filterCriteria, gremlinAttributes, query); if (context.getSearchParameters().getExcludeDeletedEntities() && indexQuery == null) { @@ -93,6 +101,10 @@ public class EntitySearchProcessor extends SearchProcessor { AtlasGraphQuery query = context.getGraph().query().in(Constants.TYPE_NAME_PROPERTY_KEY, typeAndSubTypes); + if (filterClassification) { + query.in(Constants.TRAIT_NAMES_PROPERTY_KEY, classificationType.getTypeAndAllSubTypes()); + } + filterGraphQuery = toGremlinFilterQuery(entityType, filterCriteria, allAttributes, query); if (context.getSearchParameters().getExcludeDeletedEntities()) { @@ -115,7 +127,7 @@ public class EntitySearchProcessor extends SearchProcessor { } try { - int qryOffset = (nextProcessor == null) ? context.getSearchParameters().getOffset() : 0; + int qryOffset = (nextProcessor == null && (graphQuery == null || indexQuery == null)) ? context.getSearchParameters().getOffset() : 0; int limit = context.getSearchParameters().getLimit(); int resultIdx = qryOffset; http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/7592bd4f/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java index 4ddd642..83368c2 100644 --- a/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java +++ b/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java @@ -22,12 +22,14 @@ import org.apache.atlas.repository.Constants; import org.apache.atlas.repository.graphdb.AtlasIndexQuery; import org.apache.atlas.repository.graphdb.AtlasVertex; import org.apache.atlas.utils.AtlasPerfTracer; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Set; public class FullTextSearchProcessor extends SearchProcessor { @@ -40,9 +42,39 @@ public class FullTextSearchProcessor extends SearchProcessor { super(context); SearchParameters searchParameters = context.getSearchParameters(); - String queryString = String.format("v.\"%s\":(%s)", Constants.ENTITY_TEXT_PROPERTY_KEY, searchParameters.getQuery()); + StringBuilder queryString = new StringBuilder(); - indexQuery = context.getGraph().indexQuery(Constants.FULLTEXT_INDEX, queryString); + queryString.append("v.\"").append(Constants.ENTITY_TEXT_PROPERTY_KEY).append("\":(").append(searchParameters.getQuery()); + + // if search includes entity-type criteria, adding a filter here can help avoid unnecessary + // processing (and rejection) by subsequent EntitySearchProcessor + if (context.getEntityType() != null) { + Set typeAndSubTypeNames = context.getEntityType().getTypeAndAllSubTypes(); + + if (typeAndSubTypeNames.size() <= MAX_ENTITY_TYPES_IN_INDEX_QUERY) { + queryString.append(AND_STR).append("(").append(StringUtils.join(typeAndSubTypeNames, SPACE_STRING)).append(")"); + } else { + LOG.warn("'{}' has too many subtypes ({}) to include in index-query; might cause poor performance", + context.getEntityType().getTypeName(), typeAndSubTypeNames.size()); + } + } + + // if search includes classification criteria, adding a filter here can help avoid unnecessary + // processing (and rejection) by subsequent ClassificationSearchProcessor or EntitySearchProcessor + if (context.getClassificationType() != null) { + Set typeAndSubTypeNames = context.getClassificationType().getTypeAndAllSubTypes(); + + if (typeAndSubTypeNames.size() <= MAX_CLASSIFICATION_TYPES_IN_INDEX_QUERY) { + queryString.append(AND_STR).append("(").append(StringUtils.join(typeAndSubTypeNames, SPACE_STRING)).append(")"); + } else { + LOG.warn("'{}' has too many subtypes ({}) to include in index-query; might cause poor performance", + context.getEntityType().getTypeName(), typeAndSubTypeNames.size()); + } + } + + queryString.append(")"); + + indexQuery = context.getGraph().indexQuery(Constants.FULLTEXT_INDEX, queryString.toString()); } @Override http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/7592bd4f/repository/src/main/java/org/apache/atlas/discovery/SearchContext.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/discovery/SearchContext.java b/repository/src/main/java/org/apache/atlas/discovery/SearchContext.java index 8dd7667..929f8d0 100644 --- a/repository/src/main/java/org/apache/atlas/discovery/SearchContext.java +++ b/repository/src/main/java/org/apache/atlas/discovery/SearchContext.java @@ -61,7 +61,6 @@ public class SearchContext { if (needEntityProcessor()) { addProcessor(new EntitySearchProcessor(this)); - } } @@ -104,15 +103,15 @@ public class SearchContext { return toString(new StringBuilder()).toString(); } - public boolean needFullTextrocessor() { + boolean needFullTextrocessor() { return StringUtils.isNotEmpty(searchParameters.getQuery()); } - public boolean needClassificationProcessor() { - return classificationType != null; + boolean needClassificationProcessor() { + return classificationType != null && (entityType == null || hasAttributeFilter(searchParameters.getTagFilters())); } - public boolean needEntityProcessor() { + boolean needEntityProcessor() { return entityType != null; } http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/7592bd4f/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java index ff0bd2e..596b43b 100644 --- a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java +++ b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java @@ -181,7 +181,7 @@ public abstract class SearchProcessor { protected void constructTypeTestQuery(StringBuilder solrQuery, Set typeAndAllSubTypes) { String typeAndSubtypesString = StringUtils.join(typeAndAllSubTypes, SPACE_STRING); - solrQuery.append("v.\"__typeName\": (") + solrQuery.append("v.\"").append(Constants.TYPE_NAME_PROPERTY_KEY).append("\": (") .append(typeAndSubtypesString) .append(")"); } @@ -206,7 +206,7 @@ public abstract class SearchProcessor { solrQuery.append(AND_STR); } - solrQuery.append("v.\"__state\":").append("ACTIVE"); + solrQuery.append("v.\"").append(Constants.STATE_PROPERTY_KEY).append("\":ACTIVE"); } }