Return-Path: X-Original-To: apmail-jena-commits-archive@www.apache.org Delivered-To: apmail-jena-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 647D0F86C for ; Fri, 19 Apr 2013 19:04:28 +0000 (UTC) Received: (qmail 21546 invoked by uid 500); 19 Apr 2013 19:04:28 -0000 Delivered-To: apmail-jena-commits-archive@jena.apache.org Received: (qmail 21502 invoked by uid 500); 19 Apr 2013 19:04:27 -0000 Mailing-List: contact commits-help@jena.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@jena.apache.org Delivered-To: mailing list commits@jena.apache.org Received: (qmail 21490 invoked by uid 99); 19 Apr 2013 19:04:27 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 19 Apr 2013 19:04:27 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 19 Apr 2013 19:04:26 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id E4DA3238899C; Fri, 19 Apr 2013 19:04:05 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1469989 - in /jena/trunk/jena-arq/src: main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Date: Fri, 19 Apr 2013 19:04:05 -0000 To: commits@jena.apache.org From: rvesse@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130419190405.E4DA3238899C@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: rvesse Date: Fri Apr 19 19:04:01 2013 New Revision: 1469989 URL: http://svn.apache.org/r1469989 Log: Further work on JENA-441 Generalize the optimization to deal with ORDER BY + REDUCED as well as ORDER BY + DISTINCT Generalize the optimization to allow any sort conditions provided all mentioned variables exist in the projected variables Add additional test cases to validate Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java?rev=1469989&r1=1469988&r2=1469989&view=diff ============================================================================== --- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java (original) +++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformOrderByDistinctAppplication.java Fri Apr 19 19:04:01 2013 @@ -18,63 +18,93 @@ package com.hp.hpl.jena.sparql.algebra.optimize; +import java.util.List; + +import com.hp.hpl.jena.query.ARQ; import com.hp.hpl.jena.query.SortCondition; import com.hp.hpl.jena.sparql.algebra.Op; import com.hp.hpl.jena.sparql.algebra.TransformCopy; import com.hp.hpl.jena.sparql.algebra.op.OpDistinct; import com.hp.hpl.jena.sparql.algebra.op.OpOrder; import com.hp.hpl.jena.sparql.algebra.op.OpProject; +import com.hp.hpl.jena.sparql.algebra.op.OpReduced; +import com.hp.hpl.jena.sparql.core.Var; /** - * Prototype transformer motivated by JENA-441 *

- * This is a limited optimization that applies in the case where you have a query that meets the following conditions: + * Improved optimization for {@code ORDER BY} plus {@code DISTINCT} or + * {@code REDUCED} combinations, see JENA-441 for original proposal and + * discussion. + *

+ *

+ * This optimization is enabled by default as with most ARQ optimizations and + * may be disabled by setting the symbol + * {@link ARQ#optOrderByDistinctApplication} to false. + *

+ *

Optimization Applicability

+ *

+ * This is a limited optimization that applies in the case where you have a + * query that meets the following conditions: *

*
    - *
  • Uses both ORDER BY and DISTINCT on the same level of the query
  • - *
  • The list of order conditions is only simple variables
  • - *
  • There is a fixed list of simple variables to project that correspond precisely to the order conditions + *
  • Uses both {@code ORDER BY} and {@code DISTINCT} or {@code REDUCED} on the + * same level of the query
  • + *
  • There is a fixed list of variables to project i.e. not {@code SELECT *}
  • + *
  • {@code ORDER BY} conditions only use variables present in the project + * list
  • *
*

* Essentially this takes algebras of the following form: *

- * + * + *
  * (distinct 
  *   (project (?var) 
  *     (order (?var) 
  *       ... )))
- * 
+ * 
*

* And produces algebra of the following form: *

- * + * + *
  * (order (?var)
  *   (distinct 
  *     (project (?var) 
  *       ... )))
- * 
+ * 
+ *

+ * In the general case this in unsafe because it would change the semantics of + * the query since {@code ORDER BY} can access variables that are not projected. + * However if the conditions outlined are met then this optimization is safe, + * the algebras will be semantically equivalent and the resulting form likely + * significantly more performant, of course YMMV depending on how much data you + * are querying. + *

*/ public class TransformOrderByDistinctAppplication extends TransformCopy { @Override public Op transform(OpDistinct opDistinct, Op subOp) { - if (subOp instanceof OpProject) - { - OpProject project = (OpProject)subOp; - //At the project stage everything is a simple variable - //Inner operation must be an ORDER BY - if (project.getSubOp() instanceof OpOrder) - { - OpOrder order = (OpOrder)project.getSubOp(); - - //Everything we wish to order by must a simple variable and correspond exactly to the project list - //TODO: I think this can be generalized to allow any order conditions that use variables mentioned in the project list + if (subOp instanceof OpProject) { + OpProject project = (OpProject) subOp; + // At the project stage everything is a simple variable + // Inner operation must be an ORDER BY + if (project.getSubOp() instanceof OpOrder) { + List projectVars = project.getVars(); + OpOrder order = (OpOrder) project.getSubOp(); + + // Everything we wish to order by must only use variables that + // appear in the project list boolean ok = true; for (SortCondition condition : order.getConditions()) { - if (!condition.getExpression().isVariable()) ok = false; + if (!isValidSortCondition(condition, projectVars)) { + ok = false; + break; + } } - - //Everything checks out so we can make the change + + // Everything checks out so we can make the change if (ok) { OpProject newProject = new OpProject(order.getSubOp(), project.getVars()); OpDistinct newDistinct = new OpDistinct(newProject); @@ -82,9 +112,62 @@ public class TransformOrderByDistinctApp } } } - - //If we reach here then this transform is not applicable + + // If we reach here then this transform is not applicable return super.transform(opDistinct, subOp); } + @Override + public Op transform(OpReduced opReduced, Op subOp) { + if (subOp instanceof OpProject) { + OpProject project = (OpProject) subOp; + // At the project stage everything is a simple variable + // Inner operation must be an ORDER BY + if (project.getSubOp() instanceof OpOrder) { + List projectVars = project.getVars(); + OpOrder order = (OpOrder) project.getSubOp(); + + // Everything we wish to order by must only use variables that + // appear in the project list + boolean ok = true; + for (SortCondition condition : order.getConditions()) { + if (!isValidSortCondition(condition, projectVars)) { + ok = false; + break; + } + } + + // Everything checks out so we can make the change + if (ok) { + OpProject newProject = new OpProject(order.getSubOp(), project.getVars()); + Op newReduced = OpReduced.create(newProject); + return new OpOrder(newReduced, order.getConditions()); + } + } + } + + // If we reach here then this transform is not applicable + return super.transform(opReduced, subOp); + } + + /** + * Determines whether a sort condition is valid in terms of this optimizer + * + * @param cond + * Sort Condition + * @param projectVars + * Project Variables + * @return True if valid, false otherwise + */ + private boolean isValidSortCondition(SortCondition cond, List projectVars) { + if (cond.getExpression().isVariable()) { + return projectVars.contains(cond.getExpression().asVar()); + } else { + for (Var v : cond.getExpression().getVarsMentioned()) { + if (!projectVars.contains(v)) + return false; + } + return true; + } + } } Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java?rev=1469989&r1=1469988&r2=1469989&view=diff ============================================================================== --- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java (original) +++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Fri Apr 19 19:04:01 2013 @@ -242,6 +242,79 @@ public class TestOptimizer extends BaseT check(queryString, opExpectedString) ; } + @Test public void distinct_order_by_application_04() + { + // The optimization still applies when order conditions are not simple variables + // provided every variable used in an expression appears in the project list + assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ; + String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY LCASE(STR(?p))"; + String opExpectedString = + "(order ((lcase (str (?p))))\n" + + " (distinct\n" + + " (project (?p)\n" + + " (bgp (triple ?s ?p ?o)))))" ; + check(queryString, opExpectedString) ; + } + + @Test public void distinct_order_by_application_05() + { + // The optimization still applies when order conditions are not simple variables + // provided every variable used in an expression appears in the project list + assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ; + String queryString = "SELECT DISTINCT ?s ?p { ?s ?p ?o } ORDER BY LCASE(CONCAT(?s, ?p))"; + String opExpectedString = + "(order ((lcase (concat ?s ?p)))\n" + + " (distinct\n" + + " (project (?s ?p)\n" + + " (bgp (triple ?s ?p ?o)))))" ; + check(queryString, opExpectedString) ; + } + + @Test public void distinct_order_by_application_06() + { + // The optimization can apply when order conditions are not simple variables + // provided every variable used in an expression appears in the project list + // In this case it should not apply because the condition used a variable that + // does not appear in the project list + assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ; + String queryString = "SELECT DISTINCT ?p { ?s ?p ?o } ORDER BY LCASE(CONCAT(?s, ?p))"; + String opExpectedString = + " (distinct\n" + + " (project (?p)\n" + + " (order ((lcase (concat ?s ?p)))\n" + + " (bgp (triple ?s ?p ?o)))))" ; + check(queryString, opExpectedString) ; + } + + @Test public void reduced_order_by_application_01() + { + assertTrue(ARQ.isTrueOrUndef(ARQ.optOrderByDistinctApplication)) ; + String queryString = "SELECT REDUCED ?p { ?s ?p ?o } ORDER BY ?p"; + String opExpectedString = + "(order (?p)\n" + + " (reduced\n" + + " (project (?p)\n" + + " (bgp (triple ?s ?p ?o)))))" ; + check(queryString, opExpectedString) ; + } + + @Test public void reduced_order_by_application_02() + { + try { + ARQ.setFalse(ARQ.optOrderByDistinctApplication) ; + assertTrue(ARQ.isFalse(ARQ.optOrderByDistinctApplication)) ; + String queryString = "SELECT REDUCED ?p { ?s ?p ?o } ORDER BY ?p"; + String opExpectedString = + "(reduced\n" + + " (project (?p)\n" + + " (order (?p)\n" + + " (bgp (triple ?s ?p ?o)))))" ; + check(queryString, opExpectedString) ; + } finally { + ARQ.unset(ARQ.optOrderByDistinctApplication); + } + } + @Test public void subQueryProject_01() { String qs = StrUtils.strjoinNL ( "SELECT *"