atlas-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mad...@apache.org
Subject [2/2] atlas git commit: ATLAS-2091: basic search update to avoid index query for attribute values containing Tokenizer characters
Date Thu, 14 Sep 2017 20:22:27 GMT
ATLAS-2091: basic search update to avoid index query for attribute values containing Tokenizer
characters

Signed-off-by: Madhan Neethiraj <madhan@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/atlas/repo
Commit: http://git-wip-us.apache.org/repos/asf/atlas/commit/d1c585a2
Tree: http://git-wip-us.apache.org/repos/asf/atlas/tree/d1c585a2
Diff: http://git-wip-us.apache.org/repos/asf/atlas/diff/d1c585a2

Branch: refs/heads/master
Commit: d1c585a22c4c1abd866e553480efbd21f9cffcd0
Parents: a785e93
Author: apoorvnaik <apoorvnaik@apache.org>
Authored: Thu Sep 14 11:37:06 2017 -0700
Committer: Madhan Neethiraj <madhan@apache.org>
Committed: Thu Sep 14 12:51:55 2017 -0700

----------------------------------------------------------------------
 .../apache/atlas/discovery/SearchProcessor.java | 129 ++++++++++++++-----
 1 file changed, 96 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/atlas/blob/d1c585a2/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
index f6ff8d4..64a86b9 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
@@ -168,7 +168,7 @@ public abstract class SearchProcessor {
                 if (isIndexSearchable(filterCriteria, structType)) {
                     indexFiltered.add(attributeName);
                 } else {
-                    LOG.warn("not using index-search for attribute '{}' - its either non-indexed
or a string attribute used with NEQ operator; might cause poor performance", structType.getQualifiedAttributeName(attributeName));
+                    LOG.warn("not using index-search for attribute '{}'; might cause poor
performance", structType.getQualifiedAttributeName(attributeName));
 
                     graphFiltered.add(attributeName);
                 }
@@ -330,16 +330,34 @@ public abstract class SearchProcessor {
         boolean     ret           = indexedKeys != null && indexedKeys.contains(qualifiedName);
 
         if (ret) { // index exists
-            // Don't use index query for NEQ on string type attributes - as it might return
fewer entries due to tokenization of vertex property value by indexer
-            if (filterCriteria.getOperator() == SearchParameters.Operator.NEQ) {
-                AtlasType attributeType = structType.getAttributeType(filterCriteria.getAttributeName());
+            // for string type attributes, don't use index query in the following cases:
+            //   - operation is NEQ, as it might return fewer entries due to tokenization
of vertex property value
+            //   - value-to-compare has special characters
+            AtlasType attributeType = structType.getAttributeType(filterCriteria.getAttributeName());
+
+            if (AtlasBaseTypeDef.ATLAS_TYPE_STRING.equals(attributeType.getTypeName())) {
+                if (filterCriteria.getOperator() == SearchParameters.Operator.NEQ) {
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("NEQ operator found for string attribute {}, deferring
to in-memory or graph query (might cause poor performance)", qualifiedName);
+                    }
+
+                    ret = false;
+                } else if (hasIndexQuerySpecialChar(filterCriteria.getAttributeValue()))
{
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("special characters found in filter value {}, deferring
to in-memory or graph query (might cause poor performance)", filterCriteria.getAttributeValue());
+                    }
 
-                if (AtlasBaseTypeDef.ATLAS_TYPE_STRING.equals(attributeType.getTypeName()))
{
                     ret = false;
                 }
             }
         }
 
+        if (LOG.isDebugEnabled()) {
+            if (!ret) {
+                LOG.debug("Not using index query for: attribute='{}', operator='{}', value='{}'",
qualifiedName, filterCriteria.getOperator(), filterCriteria.getAttributeValue());
+            }
+        }
+
         return ret;
     }
 
@@ -358,7 +376,6 @@ public abstract class SearchProcessor {
                     if (nestedExpression.length() > 0) {
                         nestedExpression.append(SPACE_STRING).append(criteria.getCondition()).append(SPACE_STRING);
                     }
-                    // todo: when a neq operation is nested and occurs in the beginning of
the query, index query has issues
                     nestedExpression.append(nestedQuery);
                 }
             }
@@ -539,8 +556,7 @@ public abstract class SearchProcessor {
                             query.has(qualifiedName, AtlasGraphQuery.ComparisionOperator.NOT_EQUAL,
attrValue);
                             break;
                         case LIKE:
-                            // TODO: Maybe we need to validate pattern
-                            query.has(qualifiedName, AtlasGraphQuery.MatchingOperator.REGEX,
getLikeRegex(attrValue));
+                            query.has(qualifiedName, AtlasGraphQuery.MatchingOperator.REGEX,
attrValue);
                             break;
                         case CONTAINS:
                             query.has(qualifiedName, AtlasGraphQuery.MatchingOperator.REGEX,
getContainsRegex(attrValue));
@@ -616,41 +632,88 @@ public abstract class SearchProcessor {
         }
     }
 
-    // ATLAS-2118: Reserved regex characters in attribute value can cause the graph query
to fail when parsing the contains regex
-    private String getContainsRegex(String attributeValue) {
-        StringBuilder escapedAttrVal = new StringBuilder(".*");
+    private static String getContainsRegex(String attributeValue) {
+        return ".*" + escapeRegExChars(attributeValue) + ".*";
+    }
 
-        for (int i = 0; i < attributeValue.length(); i++) {
-            final char c = attributeValue.charAt(i);
-
-            switch (c) {
-                case '+':
-                case '|':
-                case '(':
-                case '{':
-                case '[':
-                case '*':
-                case '?':
-                case '$':
-                case '/':
-                case '^':
-                    escapedAttrVal.append('\\');
-                    break;
+    private static String getSuffixRegex(String attributeValue) {
+        return ".*" + escapeRegExChars(attributeValue);
+    }
+
+    private static String escapeRegExChars(String val) {
+        StringBuilder escapedVal = new StringBuilder();
+
+        for (int i = 0; i < val.length(); i++) {
+            final char c = val.charAt(i);
+
+            if (isRegExSpecialChar(c)) {
+                escapedVal.append('\\');
             }
 
-            escapedAttrVal.append(c);
+            escapedVal.append(c);
         }
 
-        escapedAttrVal.append(".*");
+        return escapedVal.toString();
+    }
+
+    private static boolean isRegExSpecialChar(char c) {
+        switch (c) {
+            case '+':
+            case '|':
+            case '(':
+            case '{':
+            case '[':
+            case '*':
+            case '?':
+            case '$':
+            case '/':
+            case '^':
+                return true;
+        }
 
-        return escapedAttrVal.toString();
+        return false;
     }
 
-    private String getSuffixRegex(String attributeValue) {
-        return ".*" + attributeValue;
+    private static boolean hasIndexQuerySpecialChar(String attributeValue) {
+        for (int i = 0; i < attributeValue.length(); i++) {
+            if (isIndexQuerySpecialChar(attributeValue.charAt(i))) {
+                return true;
+            }
+        }
+
+        return false;
     }
 
-    private String getLikeRegex(String attributeValue) { return ".*" + attributeValue + ".*";
}
+    private static boolean isIndexQuerySpecialChar(char c) {
+        switch (c) {
+            case '+':
+            case '-':
+            case '&':
+            case '|':
+            case '!':
+            case '(':
+            case ')':
+            case '{':
+            case '}':
+            case '[':
+            case ']':
+            case '^':
+            case '"':
+            case '~':
+            case '*':
+            case '?':
+            case ':':
+            case '/':
+            case '#':
+            case '$':
+            case '%':
+            case '@':
+            case '=':
+                return true;
+        }
+
+        return false;
+    }
 
     protected List<AtlasVertex> getVerticesFromIndexQueryResult(Iterator<AtlasIndexQuery.Result>
idxQueryResult, List<AtlasVertex> vertices) {
         if (idxQueryResult != null) {


Mime
View raw message