atlas-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From suma...@apache.org
Subject incubator-atlas git commit: ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai)
Date Fri, 29 Apr 2016 04:30:22 GMT
Repository: incubator-atlas
Updated Branches:
  refs/heads/master 53574720c -> 574da752d


ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai)


Project: http://git-wip-us.apache.org/repos/asf/incubator-atlas/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-atlas/commit/574da752
Tree: http://git-wip-us.apache.org/repos/asf/incubator-atlas/tree/574da752
Diff: http://git-wip-us.apache.org/repos/asf/incubator-atlas/diff/574da752

Branch: refs/heads/master
Commit: 574da752d81a49c3200ef66e2b71c560f06b33c7
Parents: 5357472
Author: Suma Shivaprasad <sumasai.shivaprasad@gmail.com>
Authored: Thu Apr 28 21:30:11 2016 -0700
Committer: Suma Shivaprasad <sumasai.shivaprasad@gmail.com>
Committed: Thu Apr 28 21:30:11 2016 -0700

----------------------------------------------------------------------
 docs/src/site/twiki/Search.twiki                |  18 +-
 release-log.txt                                 |   1 +
 .../org/apache/atlas/query/Expressions.scala    |  30 ++
 .../apache/atlas/query/GremlinEvaluator.scala   |  23 +-
 .../org/apache/atlas/query/GremlinQuery.scala   |  70 +++-
 .../org/apache/atlas/query/QueryParser.scala    |  81 ++++-
 .../scala/org/apache/atlas/query/Resolver.scala |   8 +
 .../GraphBackedDiscoveryServiceTest.java        | 318 +++++++++++++++++++
 8 files changed, 513 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/docs/src/site/twiki/Search.twiki
----------------------------------------------------------------------
diff --git a/docs/src/site/twiki/Search.twiki b/docs/src/site/twiki/Search.twiki
index 3a1d00f..58c9238 100644
--- a/docs/src/site/twiki/Search.twiki
+++ b/docs/src/site/twiki/Search.twiki
@@ -13,7 +13,7 @@ queryWithPath: query ~ opt(WITHPATH)
 
 query: rep1sep(singleQuery, opt(COMMA))
 
-singleQuery: singleQrySrc ~ opt(loopExpression) ~ opt(selectClause)
+singleQuery: singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ~ opt(orderby) ~ opt(limitOffset)
 
 singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) |
         WHERE ~ (expr ^? notIdExpression) |
@@ -22,6 +22,14 @@ singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression)
|
 
 fromSrc: identifier ~ AS ~ alias | identifier
 
+orderby: ORDERBY ~ order ~ opt (sortOrder) 
+    
+limitOffset: LIMIT ~ lmt ~ opt (offset)
+    
+offset: OFFSET ~ offsetValue 
+    
+sortOrder = ASC | DESC
+    
 loopExpression: LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~
opt(AS ~> alias)
 
 selectClause: SELECT ~ rep1sep(selectExpression, COMMA)
@@ -79,6 +87,9 @@ Language Notes:
    * The _!WithPath_ clause can be used with transitive closure queries to retrieve the Path
that
  connects the two related Entities. (We also provide a higher level interface for Closure
Queries
   see scaladoc for 'org.apache.atlas.query.ClosureQuery')
+   * ORDERBY is optional. Orderby clause should be specified in single quote ('). When order
by clause is specified case insensitive sorting is done in ascending order.
+    For sorting in descending order specify 'DESC' after order by clause. If no order by
is specified then no default sorting is applied.
+   * LIMIT is optional. It limits the maximum number of objects to be fetched starting from
specified optional offset. If no offset is specified count starts from beginning.
    * There are couple of Predicate functions different from SQL:
       * _is_ or _isa_can be used to filter Entities that have a particular Trait.
       * _has_ can be used to filter Entities that have a value for a particular Attribute.
@@ -89,6 +100,11 @@ Language Notes:
 
    * from DB
    * DB where name="Reporting" select name, owner
+   * DB where name="Reporting" select name, owner orderby 'name'
+   * DB where name="Reporting" select name limit 10
+   * DB where name="Reporting" select name, owner limit 10 offset 0
+   * DB where name="Reporting" select name, owner orderby 'name' limit 10 offset 5
+   * DB where name="Reporting" select name, owner orderby 'name' desc limit 10 offset 5
    * DB has name
    * DB is !JdbcAccess
    * Column where Column isa PII

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index 10cf434..3cf823f 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -17,6 +17,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file
(dosset
 ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via
shwethags)
 
 ALL CHANGES:
+ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai)
 ATLAS-543 Entity Instance requests should not require ID element for new Entities (harishjp
via shwethags)
 ATLAS-681 update committer/ppmc members in the pom.xml (sneethiraj via shwethags)
 ATLAS-616 Resolve OOM - Zookeeper throws exceptions when trying to fire DSL queries at Atlas
at large scale. (yhemanth via sumasai)

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/main/scala/org/apache/atlas/query/Expressions.scala
----------------------------------------------------------------------
diff --git a/repository/src/main/scala/org/apache/atlas/query/Expressions.scala b/repository/src/main/scala/org/apache/atlas/query/Expressions.scala
index a5dfa9f..2405750 100755
--- a/repository/src/main/scala/org/apache/atlas/query/Expressions.scala
+++ b/repository/src/main/scala/org/apache/atlas/query/Expressions.scala
@@ -330,6 +330,10 @@ object Expressions {
         def instance() = new InstanceExpression(this)
 
         def path() = new PathExpression(this)
+        
+        def limit(lmt: Literal[Integer], offset : Literal[Integer]) = new LimitExpression(this,
lmt, offset)
+        
+        def order(odr: String, asc: Boolean) = new OrderExpression(this, odr, asc)
     }
 
     trait BinaryNode {
@@ -766,4 +770,30 @@ object Expressions {
 
     override def toString = s"$child withPath"
   }
+
+  case class LimitExpression(child: Expression, limit: Literal[Integer], offset: Literal[Integer])
extends Expression with UnaryNode { 
+
+    override def toString = s"$child  limit $limit offset $offset "
+
+    lazy val dataType = {
+            if (!resolved) {
+                throw new UnresolvedException(this,
+                    s"datatype. Can not resolve due to unresolved children")
+            }
+            child.dataType
+    }
+  }
+  
+  case class OrderExpression(child: Expression, odr: String, asc: Boolean) extends Expression
with UnaryNode { 
+
+    override def toString = s"$child  order $odr asc $asc"
+
+    lazy val dataType = {
+            if (!resolved) {
+                throw new UnresolvedException(this,
+                    s"datatype. Can not resolve due to unresolved children")
+            }
+            child.dataType
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/main/scala/org/apache/atlas/query/GremlinEvaluator.scala
----------------------------------------------------------------------
diff --git a/repository/src/main/scala/org/apache/atlas/query/GremlinEvaluator.scala b/repository/src/main/scala/org/apache/atlas/query/GremlinEvaluator.scala
index edb190d..7de03c3 100755
--- a/repository/src/main/scala/org/apache/atlas/query/GremlinEvaluator.scala
+++ b/repository/src/main/scala/org/apache/atlas/query/GremlinEvaluator.scala
@@ -19,7 +19,7 @@
 package org.apache.atlas.query
 
 import javax.script.{Bindings, ScriptEngine, ScriptEngineManager}
-
+import org.apache.atlas.query.Expressions._
 import com.thinkaurelius.titan.core.TitanGraph
 import com.tinkerpop.pipes.util.structures.Row
 import org.apache.atlas.query.TypeUtils.ResultWithPathStruct
@@ -27,8 +27,8 @@ import org.apache.atlas.typesystem.json._
 import org.apache.atlas.typesystem.types._
 import org.json4s._
 import org.json4s.native.Serialization._
-
 import scala.language.existentials
+import org.apache.atlas.query.Expressions._
 
 case class GremlinQueryResult(query: String,
                               resultDataType: IDataType[_],
@@ -82,7 +82,6 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS
         val rType = qry.expr.dataType
         val oType = if (qry.isPathExpresion) qry.expr.children(0).dataType else rType
         val rawRes = engine.eval(qry.queryStr, bindings)
-
         if (!qry.hasSelectList) {
             val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { v =>
                 val iV = instanceObject(v)
@@ -95,14 +94,16 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS
             val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { r =>
               val rV = instanceObject(r).asInstanceOf[Row[java.util.List[AnyRef]]]
                 val sInstance = sType.createInstance()
-                val selExpr =
-                  (if (qry.isPathExpresion) qry.expr.children(0) else qry.expr).
-                    asInstanceOf[Expressions.SelectExpression]
-                selExpr.selectListWithAlias.foreach { aE =>
-                    val cName = aE.alias
-                    val (src, idx) = qry.resultMaping(cName)
-                    val v = rV.getColumn(src).get(idx)
-                    sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType,
v))
+                val selObj = SelectExpressionHelper.extractSelectExpression(qry.expr)
+                if (selObj.isDefined)
+                {
+                    val selExpr = selObj.get.asInstanceOf[Expressions.SelectExpression]
+                        selExpr.selectListWithAlias.foreach { aE =>
+                        val cName = aE.alias
+                        val (src, idx) = qry.resultMaping(cName)
+                        val v = rV.getColumn(src).get(idx)
+                        sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType,
v))
+                    }
                 }
               addPathStruct(r, sInstance)
             }

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala
----------------------------------------------------------------------
diff --git a/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala b/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala
index f1590a8..73981c0 100755
--- a/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala
+++ b/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala
@@ -35,6 +35,7 @@ case class GremlinQuery(expr: Expression, queryStr: String, resultMaping:
Map[St
     def hasSelectList = resultMaping != null
 
     def isPathExpresion = expr.isInstanceOf[PathExpression]
+    
 }
 
 trait SelectExpressionHandling {
@@ -122,7 +123,6 @@ trait SelectExpressionHandling {
         }
         m.toMap
     }
-
 }
 
 class GremlinTranslationException(expr: Expression, reason: String) extends
@@ -186,7 +186,8 @@ class GremlinTranslator(expr: Expression,
     }
 
     def traitClauseWithInstanceForTop(topE : Expression) : PartialFunction[Expression, Expression]
= {
-      case te : TraitExpression if (te fastEquals topE) =>  {
+//          This topE check prevented the comparison of trait expression when it is a child.
Like trait as t limit 2
+        case te : TraitExpression =>  {
         val theTrait = te.as("theTrait")
         val theInstance = theTrait.traitInstance().as("theInstance")
         val outE =
@@ -202,6 +203,7 @@ class GremlinTranslator(expr: Expression,
         stats.last
     }
 
+
     private def genQuery(expr: Expression, inSelect: Boolean): String = expr match {
         case ClassExpression(clsName) =>
             typeTestExpression(clsName)
@@ -324,12 +326,26 @@ class GremlinTranslator(expr: Expression,
         case pe@PathExpression(child) => {
           s"${genQuery(child, inSelect)}.path"
         }
+        case order@OrderExpression(child, odr, asc) => {
+          var orderby = ""
+             asc  match {
+            //builds a closure comparison function based on provided order by clause in DSL.
This will be used to sort the results by gremlin order pipe.
+            //Ordering is case insensitive.
+              case false=> orderby = s"order{it.b.getProperty('$odr').toLowerCase() <=>
it.a.getProperty('$odr').toLowerCase()}"//descending
+              case _ => orderby = s"order{it.a.getProperty('$odr').toLowerCase() <=>
it.b.getProperty('$odr').toLowerCase()}"
+              
+            }
+          s"""${genQuery(child, inSelect)}.$orderby"""
+        }
+        case limitOffset@LimitExpression(child, limit, offset) => {
+          val totalResultRows = limit.value + offset.value
+          s"""${genQuery(child, inSelect)} [$offset..<$totalResultRows]"""
+        }
         case x => throw new GremlinTranslationException(x, "expression not yet supported")
     }
 
     def genFullQuery(expr: Expression): String = {
         var q = genQuery(expr, false)
-
         if(gPersistenceBehavior.addGraphVertexPrefix(preStatements)) {
             q = s"g.V.$q"
         }
@@ -354,21 +370,48 @@ class GremlinTranslator(expr: Expression,
         e1 = e1.transformUp(addAliasToLoopInput())
         e1 = e1.transformUp(instanceClauseToTop(e1))
         e1 = e1.transformUp(traitClauseWithInstanceForTop(e1))
-
-        e1 match {
-            case e1: SelectExpression => {
-                val rMap = buildResultMapping(e1)
+        
+        //Following code extracts the select expressions from expression tree.
+        
+             val  se = SelectExpressionHelper.extractSelectExpression(e1)
+             if (se.isDefined)
+             {
+                val  rMap = buildResultMapping(se.get)
                 GremlinQuery(e1, genFullQuery(e1), rMap)
-            }
-            case pe@PathExpression(se@SelectExpression(child, selectList)) => {
-              val rMap = buildResultMapping(se)
-              GremlinQuery(e1, genFullQuery(e1), rMap)
-            }
-            case e1 => GremlinQuery(e1, genFullQuery(e1), null)
+             }
+             else
+             {
+                GremlinQuery(e1, genFullQuery(e1), null)
+             }
         }
 
     }
 
+    object SelectExpressionHelper {
+       /**
+     * This method extracts the child select expression from parent expression
+     */
+     def extractSelectExpression(child: Expression): Option[SelectExpression] = {
+      child match {
+            case se@SelectExpression(child, selectList) =>{
+              Some(se)
+            }
+            case limit@LimitExpression(child, lmt, offset) => {
+              extractSelectExpression(child)
+            }
+            case order@OrderExpression(child, odr, odrBy) => {
+              extractSelectExpression(child)
+            }
+           case path@PathExpression(child) => {
+              extractSelectExpression(child)
+           }
+           case _ => {
+             None
+           }
+           
+      }
+    }
+    }
     /*
      * TODO
      * Translation Issues:
@@ -379,4 +422,3 @@ class GremlinTranslator(expr: Expression,
      * The solution is to to do predicate pushdown and apply the filter immediately on top
of the referred Expression.
      */
 
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/main/scala/org/apache/atlas/query/QueryParser.scala
----------------------------------------------------------------------
diff --git a/repository/src/main/scala/org/apache/atlas/query/QueryParser.scala b/repository/src/main/scala/org/apache/atlas/query/QueryParser.scala
index b6bbbd3..60b57d9 100755
--- a/repository/src/main/scala/org/apache/atlas/query/QueryParser.scala
+++ b/repository/src/main/scala/org/apache/atlas/query/QueryParser.scala
@@ -64,10 +64,14 @@ trait QueryKeywords {
     protected val AS = Keyword("as")
     protected val TIMES = Keyword("times")
     protected val WITHPATH = Keyword("withPath")
+    protected val LIMIT = Keyword("limit")
+    protected val OFFSET = Keyword("offset")
+    protected val ORDERBY = Keyword("orderby")
 }
 
 trait ExpressionUtils {
 
+    protected val DESC = "desc"
     def loop(input: Expression, l: (Expression, Option[Literal[Integer]], Option[String]))
= l match {
         case (c, None, None) => input.loop(c)
         case (c, t, None) => input.loop(c, t.get)
@@ -85,6 +89,14 @@ trait ExpressionUtils {
         input.select(selList: _*)
     }
 
+    def limit(input: Expression, lmt: Literal[Integer], offset: Literal[Integer]) = {
+        input.limit(lmt, offset) 
+    }
+    
+    def order(input: Expression, odr: String, asc: Boolean) = {
+        input.order(odr, asc)
+    }
+        
     def leftmostId(e: Expression) = {
         var le: IdExpression = null
         e.traverseUp { case i: IdExpression if le == null => le = i}
@@ -106,7 +118,10 @@ trait ExpressionUtils {
         sngQuery2.transformUp(replaceIdWithField(leftSrcId, snglQuery1.field(leftSrcId.name)))
     }
 }
-
+/**
+ * Query parser is used to parse the DSL query. It uses scala PackratParsers and pattern
matching to extract the expressions.
+ * It builds up a expression tree.
+ */
 object QueryParser extends StandardTokenParsers with QueryKeywords with ExpressionUtils with
PackratParsers {
 
     import scala.language.higherKinds
@@ -137,12 +152,33 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with
Expressi
         case h :: t => t.foldLeft(h)(merge(_, _))
     }
     }
-
-    def singleQuery = singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ^^ {
-        case s ~ None ~ None => s
-        case s ~ l ~ None => loop(s, l.get)
-        case s ~ l ~ sel if l.isDefined => select(loop(s, l.get), sel.get)
-        case s ~ None ~ sel => select(s, sel.get)
+      /**
+     * A singleQuery can have the following forms:
+     * 1. SrcQuery [select] [orderby desc] [Limit x offset y] -> source query followed
by optional select statement followed by optional order by followed by optional limit
+     * eg: Select "hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset
1"
+     * @return
+     */
+    def singleQuery = singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ~ opt(orderby)
~ opt(limitOffset) ^^ {
+      case s ~ l ~ sel ~ odr ~ lmtoff => {
+        var expressiontree = s
+        if (l.isDefined) //Note: The order of if statements is important. 
+        {
+          expressiontree = loop(expressiontree, l.get);
+        }
+        if (odr.isDefined)
+        {
+          expressiontree = order(expressiontree, odr.get._1, odr.get._2)
+        }
+        if (lmtoff.isDefined)
+        {
+          expressiontree = limit(expressiontree, int (lmtoff.get._1), int (lmtoff.get._2))
+        }
+        if (sel.isDefined)
+        {
+          expressiontree = select(expressiontree, sel.get)
+        }
+        expressiontree
+      }
     }
 
     /**
@@ -182,7 +218,25 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with
Expressi
     def fromSrc = identifier ~ AS ~ alias ^^ { case s ~ a ~ al => s.as(al)} |
         identifier
 
-
+    def orderby = ORDERBY ~ order ~ opt (asce) ^^ {
+      case o ~ odr ~ None => (odr, true)
+      case o ~ odr ~ asc => (odr, asc.get)
+    }
+    
+    def limitOffset = LIMIT ~ lmt ~ opt (offset) ^^ {
+      case l ~ lt ~ None => (lt, 0)
+      case l ~ lt ~ of => (lt, of.get)
+    }
+    
+    def offset = OFFSET ~ ofset  ^^ {
+      case offset ~ of  => of
+    }
+    
+    def asce = asc ^^ {
+      case DESC  => false
+      case  _ => true
+    }
+    
     def loopExpression: Parser[(Expression, Option[Literal[Integer]], Option[String])] =
         LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS
~> alias) ^^ {
             case l ~ e ~ None ~ a => (e, None, a)
@@ -192,7 +246,6 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with
Expressi
     def selectClause: Parser[List[(Expression, Option[String])]] = SELECT ~ rep1sep(selectExpression,
COMMA) ^^ {
         case s ~ cs => cs
     }
-
     def selectExpression: Parser[(Expression, Option[String])] = expr ~ opt(AS ~> alias)
^^ {
         case e ~ a => (e, a)
     }
@@ -239,7 +292,15 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with
Expressi
     }
 
     def alias = ident | stringLit
-
+    
+    def lmt = intConstant
+    
+    def ofset = intConstant
+
+    def order = ident | stringLit
+    
+    def asc =  ident | stringLit
+    
     def literal = booleanConstant ^^ {
         boolean(_)
         } |

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/main/scala/org/apache/atlas/query/Resolver.scala
----------------------------------------------------------------------
diff --git a/repository/src/main/scala/org/apache/atlas/query/Resolver.scala b/repository/src/main/scala/org/apache/atlas/query/Resolver.scala
index c7e1e81..5fc9400 100755
--- a/repository/src/main/scala/org/apache/atlas/query/Resolver.scala
+++ b/repository/src/main/scala/org/apache/atlas/query/Resolver.scala
@@ -90,6 +90,14 @@ class Resolver(srcExpr: Option[Expression] = None, aliases: Map[String,
Expressi
         case l@LoopExpression(inputExpr, loopExpr, t) if inputExpr.resolved => {
             val r = new Resolver(Some(inputExpr), inputExpr.namedExpressions, true)
             return new LoopExpression(inputExpr, loopExpr.transformUp(r), t)
+            }
+        case lmt@LimitExpression(child, limit, offset) => {
+            val r = new Resolver(Some(child), child.namedExpressions)
+            return new LimitExpression(child.transformUp(r), limit, offset)
+        }
+        case order@OrderExpression(child, odr, asc) => {
+            val r = new Resolver(Some(child), child.namedExpressions)
+            return new OrderExpression(child.transformUp(r), odr, asc)
         }
         case x => x
     }

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java
----------------------------------------------------------------------
diff --git a/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java
b/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java
index 06ac492..9cc501f 100755
--- a/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java
+++ b/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java
@@ -19,6 +19,7 @@
 package org.apache.atlas.discovery;
 
 import com.google.common.collect.ImmutableSet;
+
 import org.apache.atlas.BaseHiveRepositoryTest;
 import org.apache.atlas.RepositoryMetadataModule;
 import org.apache.atlas.RequestContext;
@@ -45,8 +46,10 @@ import org.testng.annotations.Guice;
 import org.testng.annotations.Test;
 
 import javax.inject.Inject;
+
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
@@ -248,6 +251,297 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest
{
         };
     }
 
+    @DataProvider(name = "dslLimitQueriesProvider")
+    private Object[][] createDSLQueriesWithLimit() {
+        return new Object[][]{
+                {"hive_column  limit 10 ", 10},
+                {"hive_column select hive_column.name limit 10 ", 10},
+                {"hive_column select hive_column.name  withPath", 29},
+                {"hive_column select hive_column.name limit 10 withPath", 10},
+                {"from hive_db", 3},
+                {"from hive_db limit 2", 2},
+                {"from hive_db limit 2 offset 0", 2},
+                {"from hive_db limit 2 offset 1", 2},
+                {"from hive_db limit 3 offset 1", 2},
+                {"hive_db", 3},
+                {"hive_db where hive_db.name=\"Reporting\"", 1},
+                {"hive_db where hive_db.name=\"Reporting\" limit 10 ", 1},
+                {"hive_db hive_db.name = \"Reporting\"", 1},
+                {"hive_db where hive_db.name=\"Reporting\" select name, owner", 1},
+                {"hive_db has name", 3},
+                {"hive_db has name limit 2 offset 0", 2},
+                {"hive_db has name limit 2 offset 1", 2},
+                {"hive_db has name limit 10 offset 1", 2},
+                {"hive_db has name limit 10 offset 0", 3},
+                {"hive_db, hive_table", 8},
+                {"hive_db, hive_table limit 5", 5},
+                {"hive_db, hive_table limit 5 offset 0", 5},
+                {"hive_db, hive_table limit 5 offset 5", 3},
+                
+                {"View is JdbcAccess", 2},
+                {"View is JdbcAccess limit 1", 1},
+                {"View is JdbcAccess limit 2 offset 1", 1},
+                {"hive_db as db1, hive_table where db1.name = \"Reporting\"", 0}, //Not working
- ATLAS-145
+                
+                
+                {"from hive_table", 8},
+                {"from hive_table limit 5", 5},
+                {"from hive_table limit 5 offset 5", 3},
+                
+                {"hive_table", 8},
+                {"hive_table limit 5", 5},
+                {"hive_table limit 5 offset 5", 3},
+                
+                {"hive_table isa Dimension", 3},
+                {"hive_table isa Dimension limit 2", 2},
+                {"hive_table isa Dimension limit 2 offset 0", 2},
+                {"hive_table isa Dimension limit 2 offset 1", 2},
+                {"hive_table isa Dimension limit 3 offset 1", 2},
+                
+                {"hive_column where hive_column isa PII", 6},
+                {"hive_column where hive_column isa PII limit 5", 5},
+                {"hive_column where hive_column isa PII limit 5 offset 1", 5},
+                {"hive_column where hive_column isa PII limit 5 offset 5", 1},
+                
+                
+                {"View is Dimension" , 2},
+                {"View is Dimension limit 1" , 1},
+                {"View is Dimension limit 1 offset 1" , 1},
+                {"View is Dimension limit 10 offset 1" , 1},
+                
+                {"hive_column select hive_column.name", 29},
+                {"hive_column select hive_column.name limit 5", 5},
+                {"hive_column select hive_column.name limit 5 offset 28", 1},
+                
+                {"hive_column select name", 29},
+                {"hive_column select name limit 5", 5},
+                {"hive_column select name limit 5 offset 28 ", 1},
+                
+                {"hive_column where hive_column.name=\"customer_id\"", 4},
+                {"hive_column where hive_column.name=\"customer_id\" limit 2", 2},
+                {"hive_column where hive_column.name=\"customer_id\" limit 2 offset 1", 2},
+                {"hive_column where hive_column.name=\"customer_id\" limit 10 offset 3",
1},
+                
+                {"from hive_table select hive_table.name", 8},
+                {"from hive_table select hive_table.name limit 5", 5},
+                {"from hive_table select hive_table.name limit 5 offset 5", 3},
+                
+                {"hive_db where (name = \"Reporting\")", 1},
+                {"hive_db where (name = \"Reporting\") limit 10", 1},
+                {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1",
1},
+                {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1
limit 10", 1},
+                {"hive_db where hive_db is JdbcAccess", 0}, //Not supposed to work
+                {"hive_db hive_table", 8},
+                {"hive_db hive_table limit 5", 5},
+                {"hive_db hive_table limit 5 offset 5", 3},
+                {"hive_db where hive_db has name", 3},
+                {"hive_db where hive_db has name limit 5", 3},
+                {"hive_db where hive_db has name limit 2 offset 0", 2},
+                {"hive_db where hive_db has name limit 2 offset 1", 2},
+                
+                {"hive_db as db1 hive_table where (db1.name = \"Reporting\")", 0}, //Not
working -> ATLAS-145
+                {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime
+ 1) as _col_1 ", 1},
+                {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime
+ 1) as _col_1 limit 10", 1},
+                {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime
+ 1) as _col_1 limit 10 offset 1", 0},
+                {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime
+ 1) as _col_1 limit 10 offset 0", 1},
+                
+                {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\"
) select name as _col_0, createTime as _col_1 ", 1},
+                {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\"
) select name as _col_0, createTime as _col_1 limit 10 ", 1},
+                {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\"
) select name as _col_0, createTime as _col_1 limit 10 offset 0", 1},
+                {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\"
) select name as _col_0, createTime as _col_1 limit 10 offset 5", 0},
+                
+                {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\"
) select name as _col_0, createTime as _col_1 ", 1},
+                {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\"
) select name as _col_0, createTime as _col_1 limit 10 offset 0", 1},
+                {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\"
) select name as _col_0, createTime as _col_1 limit 10 offset 1", 0},
+                {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\"
) select name as _col_0, createTime as _col_1 limit 10", 1},
+                {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\"
) select name as _col_0, createTime as _col_1 limit 0 offset 1", 0},
+
+                // trait searches
+                {"Dimension", 5},
+                {"Dimension limit 2", 2},
+                {"Dimension limit 2 offset 1", 2},
+                {"Dimension limit 5 offset 4", 1},
+                
+                {"JdbcAccess", 2},
+                {"JdbcAccess limit 5 offset 0", 2},
+                {"JdbcAccess limit 2 offset 1", 1},
+                {"JdbcAccess limit 1", 1},
+                
+                {"ETL", 3},
+                {"ETL limit 2", 2},
+                {"ETL limit 1", 1},
+                {"ETL limit 1 offset 0", 1},
+                {"ETL limit 2 offset 1", 2},
+                
+                {"Metric", 5},
+                {"Metric limit 10", 5},
+                {"Metric limit 2", 2},
+                {"Metric limit 10 offset 1", 4},
+                
+                
+                
+                {"PII", 6},
+                {"PII limit 10", 6},
+                {"PII limit 2", 2},
+                {"PII limit 10 offset 1", 5},
+                
+                {"`Log Data`", 4},
+                {"`Log Data` limit 3", 3},
+                {"`Log Data` limit 10 offset 2", 2},
+
+
+                {"hive_table where name='sales_fact', db where name='Sales'", 1},
+                {"hive_table where name='sales_fact', db where name='Sales' limit 10", 1},
+                {"hive_table where name='sales_fact', db where name='Sales' limit 10 offset
1", 0},
+                {"hive_table where name='sales_fact', db where name='Reporting'", 0},
+                {"hive_table where name='sales_fact', db where name='Reporting' limit 10",
0},
+                {"hive_table where name='sales_fact', db where name='Reporting' limit 10
offset 1", 0},
+                {"hive_partition as p where values = ['2015-01-01']", 1},
+                {"hive_partition as p where values = ['2015-01-01'] limit 10", 1},
+                {"hive_partition as p where values = ['2015-01-01'] limit 10 offset 1", 0},
+                
+        };
+    }
+    
+    @DataProvider(name = "dslOrderByQueriesProvider")
+    private Object[][] createDSLQueriesWithOrderBy() {
+        Boolean isAscending = Boolean.TRUE;
+        return new Object[][]{
+                //Not working, the problem is in server code not figuring out how to sort.
not sure if it is valid use case.
+//                {"hive_db  hive_table  orderby 'hive_db.owner'", 8, "owner", isAscending},
+//                {"hive_db hive_table orderby 'hive_db.owner' limit 5", 5, "owner", isAscending},
+//                {"hive_db hive_table orderby 'hive_db.owner' limit 5 offset 5", 3, "owner",
isAscending},
+                {"hive_column select hive_column.name orderby 'hive_column.name' limit 10
withPath", 10, "name", isAscending},
+                {"hive_column select hive_column.name orderby 'hive_column.name' asc limit
10 withPath", 10, "name", isAscending},
+                {"hive_column select hive_column.name orderby 'hive_column.name' desc limit
10 withPath", 10, "name", !isAscending},
+                {"from hive_db orderby 'hive_db.owner' limit 3", 3, "owner", isAscending},
+                {"hive_db where hive_db.name=\"Reporting\" orderby 'owner'", 1, "owner",
isAscending},
+                
+                {"hive_db where hive_db.name=\"Reporting\" orderby 'hive_db.owner' limit
10 ", 1, "owner", isAscending},
+                {"hive_db where hive_db.name=\"Reporting\" select name, owner orderby 'hive_db.name'
", 1, "name", isAscending},
+                {"hive_db has name orderby 'hive_db.owner' limit 10 offset 0", 3, "owner",
isAscending},
+                
+                {"from hive_table orderby 'hive_table.owner'", 8, "owner", isAscending},
+                {"from hive_table orderby 'hive_table.owner' limit 8", 8, "owner", isAscending},
+                
+                {"hive_table orderby 'hive_table.owner'", 8, "owner", isAscending},
+                {"hive_table orderby 'hive_table.owner' limit 8", 8, "owner", isAscending},
+                {"hive_table orderby 'hive_table.owner' limit 8 offset 0", 8, "owner", isAscending},
+                {"hive_table orderby 'hive_table.owner' desc limit 8 offset 0", 8, "owner",
!isAscending},
+                
+                {"hive_table isa Dimension orderby 'hive_table.owner'", 3, "owner", isAscending},//order
not working
+                {"hive_table isa Dimension orderby 'hive_table.owner' limit 3", 3, "owner",
isAscending},
+                {"hive_table isa Dimension orderby 'hive_table.owner' limit 3 offset 0",
3, "owner", isAscending},
+                {"hive_table isa Dimension orderby 'hive_table.owner' desc limit 3 offset
0", 3, "owner", !isAscending},
+                
+                {"hive_column where hive_column isa PII orderby 'hive_column.name'", 6, "name",
isAscending},
+                {"hive_column where hive_column isa PII orderby 'hive_column.name' limit
5", 5, "name", isAscending},
+                {"hive_column where hive_column isa PII orderby 'hive_column.name' limit
5 offset 1", 5, "name", isAscending},
+                {"hive_column where hive_column isa PII orderby 'hive_column.name' desc limit
5 offset 1", 5, "name", !isAscending},
+                
+                
+                {"hive_column select hive_column.name orderby 'hive_column.name' ", 29, "hive_column.name",
isAscending},
+                {"hive_column select hive_column.name orderby 'hive_column.name' limit 5",
5, "hive_column.name", isAscending},
+                {"hive_column select hive_column.name orderby 'hive_column.name' desc limit
5", 5, "hive_column.name", !isAscending},
+                {"hive_column select hive_column.name orderby 'hive_column.name' limit 5
offset 28", 1, "hive_column.name", isAscending},
+                
+                {"hive_column select name orderby 'hive_column.name'", 29, "name", isAscending},
+                {"hive_column select name orderby 'hive_column.name' limit 5", 5, "name",
isAscending},
+                {"hive_column select name orderby 'hive_column.name' desc", 29, "name", !isAscending},
+                
+                {"hive_column where hive_column.name=\"customer_id\" orderby 'hive_column.name'",
4, "name", isAscending},
+                {"hive_column where hive_column.name=\"customer_id\" orderby 'hive_column.name'
limit 2", 2, "name", isAscending},
+                {"hive_column where hive_column.name=\"customer_id\" orderby 'hive_column.name'
limit 2 offset 1", 2, "name", isAscending},
+                
+                {"from hive_table select owner orderby 'hive_table.owner'", 8, "owner", isAscending},
+                {"from hive_table select owner orderby 'hive_table.owner' limit 5", 5, "owner",
isAscending},
+                {"from hive_table select owner orderby 'hive_table.owner' desc limit 5",
5, "owner", !isAscending},
+                {"from hive_table select owner orderby 'hive_table.owner' limit 5 offset
5", 3, "owner", isAscending},
+                
+                {"hive_db where (name = \"Reporting\") orderby 'hive_db.name'", 1, "name",
isAscending},
+                {"hive_db where (name = \"Reporting\") orderby 'hive_db.name' limit 10",
1, "name", isAscending},
+                {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1
orderby '_col_1'", 1, "_col_1", isAscending}, //will it work
+                {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1
 orderby '_col_1' limit 10", 1, "_col_1", isAscending},
+                {"hive_db where hive_db has name orderby 'hive_db.owner'", 3, "owner", isAscending},
+                {"hive_db where hive_db has name orderby 'hive_db.owner' limit 5", 3, "owner",
isAscending},
+                {"hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset 0",
2, "owner", isAscending},
+                {"hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset 1",
2, "owner", isAscending},
+                
+                {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime
+ 1) as _col_1 orderby '_col_1'", 1, "_col_1", isAscending},
+                {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime
+ 1) as _col_1 orderby '_col_1' limit 10", 1, "_col_1", isAscending},
+                {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime
+ 1) as _col_1 orderby '_col_1' limit 10 offset 1", 0, "_col_1", isAscending},
+                {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime
+ 1) as _col_1 orderby '_col_1' limit 10 offset 0", 1, "_col_1", isAscending},
+                
+                {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\"
) select name as _col_0, createTime as _col_1 orderby '_col_1' ", 1, "_col_1", isAscending},
+                {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\"
) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 ", 1, "_col_1", isAscending},
+                {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\"
) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 offset 0", 1, "_col_1",
isAscending},
+                {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\"
) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 offset 5", 0, "_col_1",
isAscending},
+                
+                {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\"
) select name as _col_0, createTime as _col_1 orderby '_col_0' ", 1, "_col_0", isAscending},
+                {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\"
) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10 offset 0", 1, "_col_0",
isAscending},
+                {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\"
) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10 offset 1", 0, "_col_0",
isAscending},
+                {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\"
) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10", 1, "_col_0", isAscending},
+                {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\"
) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 0 offset 1", 0, "_col_0",
isAscending},
+
+        };
+    }
+    
+    @Test(dataProvider = "dslOrderByQueriesProvider")
+    public void  testSearchByDSLQueriesWithOrderBy(String dslQuery, Integer expectedNumRows,
String orderBy, boolean ascending) throws Exception {
+        System.out.println("Executing dslQuery = " + dslQuery);
+        String jsonResults = discoveryService.searchByDSL(dslQuery);
+        Assert.assertNotNull(jsonResults);
+
+        JSONObject results = new JSONObject(jsonResults);
+        Assert.assertEquals(results.length(), 3);
+
+        Object query = results.get("query");
+        Assert.assertNotNull(query);
+
+        JSONObject dataType = results.getJSONObject("dataType");
+        Assert.assertNotNull(dataType);
+        String typeName = dataType.getString("typeName");
+        Assert.assertNotNull(typeName);
+
+        JSONArray rows = results.getJSONArray("rows");
+       
+        Assert.assertNotNull(rows);
+        Assert.assertEquals(rows.length(), expectedNumRows.intValue()); // some queries may
not have any results
+        List<String> returnedList = new ArrayList<String> ();
+        for (int i = 0; i < rows.length(); i++) {
+            JSONObject row = rows.getJSONObject(i);
+            try
+            {
+                returnedList.add(row.get(orderBy).toString());
+            }
+            catch(Exception ex)
+            {
+                System.out.println( " Exception occured " + ex.getMessage());
+            }
+        }
+        Iterator<String> iter = returnedList.iterator();
+        String _current = null, _prev = null;
+        //Following code compares the results in rows and makes sure data is sorted as expected.
+        while(iter.hasNext())
+        {
+            _prev = _current;
+            _current = iter.next().toLowerCase();
+            if (_prev != null && _prev.compareTo(_current) != 0)
+            {
+                if(ascending)
+                {
+                    Assert.assertTrue(_prev.compareTo(_current) < 0);
+                }
+                else
+                {
+                    Assert.assertTrue(_prev.compareTo(_current) > 0);
+                }
+            }
+        }
+        
+        System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows");
+    }
+    
     @Test(dataProvider = "dslQueriesProvider")
     public void  testSearchByDSLQueries(String dslQuery, Integer expectedNumRows) throws
Exception {
         System.out.println("Executing dslQuery = " + dslQuery);
@@ -272,6 +566,30 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest
{
         System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows");
     }
 
+    @Test(dataProvider = "dslLimitQueriesProvider")
+    public void  testSearchByDSLQueriesWithLimit(String dslQuery, Integer expectedNumRows)
throws Exception {
+        System.out.println("Executing dslQuery = " + dslQuery);
+        String jsonResults = discoveryService.searchByDSL(dslQuery);
+        Assert.assertNotNull(jsonResults);
+
+        JSONObject results = new JSONObject(jsonResults);
+        Assert.assertEquals(results.length(), 3);
+        System.out.println("results = " + results);
+
+        Object query = results.get("query");
+        Assert.assertNotNull(query);
+
+        JSONObject dataType = results.getJSONObject("dataType");
+        Assert.assertNotNull(dataType);
+        String typeName = dataType.getString("typeName");
+        Assert.assertNotNull(typeName);
+
+        JSONArray rows = results.getJSONArray("rows");
+        Assert.assertNotNull(rows);
+        Assert.assertEquals(rows.length(), expectedNumRows.intValue()); // some queries may
not have any results
+        System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows");
+    }
+    
     @DataProvider(name = "invalidDslQueriesProvider")
     private Object[][] createInvalidDSLQueries() {
         return new String[][]{{"from Unknown"}, {"Unknown"}, {"Unknown is Blah"},};



Mime
View raw message