jena-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rve...@apache.org
Subject svn commit: r1496169 - in /jena/trunk/jena-arq/src: main/java/com/hp/hpl/jena/sparql/algebra/optimize/ test/java/com/hp/hpl/jena/sparql/algebra/ test/java/com/hp/hpl/jena/sparql/algebra/optimize/
Date Mon, 24 Jun 2013 19:19:16 GMT
Author: rvesse
Date: Mon Jun 24 19:19:16 2013
New Revision: 1496169

URL: http://svn.apache.org/r1496169
Log:
More work on implicit join optimization (JENA-473)

Add additional safety checks for overlapping implicit join constraints, add additional tests
to validate
Enabled implicit join optimization by default

Also fixes a bug I spotted in TransformFilterEquality where overlapping constraints would
lead to semantically incorrect algebra.  Fixed and test case added for this

Modified:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/Optimize.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterEquality.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterImplicitJoin.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
    jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/TestClassify.java
    jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/Optimize.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/Optimize.java?rev=1496169&r1=1496168&r2=1496169&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/Optimize.java
(original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/Optimize.java
Mon Jun 24 19:19:16 2013
@@ -173,10 +173,10 @@ public class Optimize implements Rewrite
         
         // Currently implicit join optimizations must be explicitly enabled
         
-        if ( context.isTrue(ARQ.optFilterImplicitJoin) )
+        if ( context.isTrueOrUndef(ARQ.optFilterImplicitJoin) )
             op = apply("Filter Implicit Join", new TransformFilterImplicitJoin(), op);
         
-        if ( context.isTrue(ARQ.optImplicitLeftJoin) )
+        if ( context.isTrueOrUndef(ARQ.optImplicitLeftJoin) )
             op = apply("Implicit Left Join", new TransformImplicitLeftJoin(), op);
         
         if ( context.isTrueOrUndef(ARQ.optFilterEquality) )

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterEquality.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterEquality.java?rev=1496169&r1=1496168&r2=1496169&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterEquality.java
(original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterEquality.java
Mon Jun 24 19:19:16 2013
@@ -22,6 +22,7 @@ import static org.apache.jena.atlas.lib.
 
 import java.util.ArrayList ;
 import java.util.Collection ;
+import java.util.HashSet;
 import java.util.List ;
 import java.util.Set ;
 
@@ -65,6 +66,11 @@ public class TransformFilterEquality ext
         Collection<Var> varsMentioned = varsMentionedInEqualityFilters(equalities)
;
         ExprList remaining = p.getRight() ;
         
+        // If any of the conditions overlap the optimization is unsafe 
+        // (the query is also likely incorrect but that isn't our problem)
+        if (varsMentioned.size() < equalities.size())
+            return null;
+        
         // ---- Check if the subOp is the right shape to transform.
         Op op = subOp ;
         
@@ -168,7 +174,7 @@ public class TransformFilterEquality ext
 
     private static Collection<Var> varsMentionedInEqualityFilters(List<Pair<Var,
NodeValue>> equalities)
     {
-        List<Var> vars = new ArrayList<Var>() ;
+        Set<Var> vars = new HashSet<Var>() ;
         for ( Pair<Var, NodeValue> p : equalities )
             vars.add(p.getLeft()) ;
         return vars ;

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterImplicitJoin.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterImplicitJoin.java?rev=1496169&r1=1496168&r2=1496169&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterImplicitJoin.java
(original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformFilterImplicitJoin.java
Mon Jun 24 19:19:16 2013
@@ -70,7 +70,8 @@ import com.hp.hpl.jena.sparql.expr.*;
  * }
  * </pre>
  * <p>
- * The optimizer does not cover the implicit left join case, for that see {@link TransformImplicitLeftJoin}
+ * The optimizer does not cover the implicit left join case, for that see
+ * {@link TransformImplicitLeftJoin}
  * </p>
  * <h3>Applicability</h3>
  * <p>
@@ -84,7 +85,8 @@ import com.hp.hpl.jena.sparql.expr.*;
  * </p>
  * <h3>Known Limitations/To Do</h3>
  * <ul>
- * <li>Application to implicit joins may block the sequence transform which means the
potential benefits of the optimization are negated</li>
+ * <li>Application to implicit joins may block the sequence transform which
+ * means the potential benefits of the optimization are negated</li>
  * </ul>
  */
 public class TransformFilterImplicitJoin extends TransformCopy {
@@ -107,6 +109,16 @@ public class TransformFilterImplicitJoin
         Collection<Var> varsMentioned = varsMentionedInImplictJoins(joins);
         ExprList remaining = p.getRight();
 
+        // Not possible to optimize if multiple overlapping implicit joins
+        // We can test this simply by checking that the number of vars in
+        // varsMentioned is double the number of detected implicit joins
+        
+        // TODO In principal this may be safe provided we carefully apply the
+        // substitutions in the correct order, this is left as a future
+        // enhancement to this optimizer
+        if (varsMentioned.size() != joins.size() * 2)
+            return null;
+
         // ---- Check if the subOp is the right shape to transform.
         Op op = subOp;
 

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java?rev=1496169&r1=1496168&r2=1496169&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
(original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
Mon Jun 24 19:19:16 2013
@@ -40,7 +40,7 @@ public class TransformTopN extends Trans
     @Override
 	public Op transform(OpSlice opSlice, Op subOp) { 
         /* 
-         * This looks for all the follwoing cases of slice with optionally 
+         * This looks for all the following cases of slice with optionally 
          * distinct and/or project follow by order
          * 
          *  + slice order
@@ -78,7 +78,7 @@ public class TransformTopN extends Trans
          * The project can also be over the slice.    
          *
          * The case of (slice (distinct (project (vars) (order ...))))
-         * does not work because distinct-project menas we do not know how
+         * does not work because distinct-project means we do not know how
          * but to make topN buffer.
          * 
          * When there is no project, we can push the distinct under the topN,

Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/TestClassify.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/TestClassify.java?rev=1496169&r1=1496168&r2=1496169&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/TestClassify.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/TestClassify.java Mon
Jun 24 19:19:16 2013
@@ -176,24 +176,34 @@ public class TestClassify extends TestCa
     { classifyLJ("{ ?s ?p ?x OPTIONAL { ?s1 ?p2 ?x . BIND(?x AS ?test) } }", true)  ; }
         
     /**
-     * Can't linearize with BIND present provided if any mentioned vars are not on RHS
+     * Can't linearize with BIND present if any mentioned vars are not on RHS
      */
     @Test public void testClassify_LeftJoin_12()
     { classifyLJ("{ ?s ?p ?x OPTIONAL { ?s1 ?p2 ?x . BIND(?s AS ?test) } }", false)  ; }
     
     /**
-     * Can't linearize with BIND present provided if any mentioned vars are not on RHS
+     * Can't linearize with BIND present if any mentioned vars are not on RHS
      */
     @Test public void testClassify_LeftJoin_13()
     { classifyLJ("{ ?s ?p ?x OPTIONAL { ?s1 ?p2 ?x . BIND(CONCAT(?s, ?x) AS ?test) } }",
false)  ; }
     
     /**
-     * Can't linearize with BIND present provided if any mentioned vars are not on RHS
+     * Can't linearize with BIND present if any mentioned vars are not on RHS
      */
     @Test public void testClassify_LeftJoin_14()
     { classifyLJ("{ ?s ?p ?x OPTIONAL { ?s1 ?p2 ?x . BIND(CONCAT(?s1, ?p1, ?p2, ?x) AS ?test)
} }", false)  ; }
-        
     
+    /**
+     * Can't linearize with BIND present if any mentioned vars are not fixed on RHS
+     */
+    @Test public void testClassify_LeftJoin_15()
+    { classifyLJ("{ ?s ?p ?x OPTIONAL { BIND(?x AS ?test) OPTIONAL { ?x ?p1 ?o1 } } }", false)
 ; }
+    
+    /**
+     * Test left join classification
+     * @param pattern WHERE clause for the query as a string
+     * @param expected Whether the join should be classified as linear
+     */
     private void classifyLJ(String pattern, boolean expected)
     {
         String qs1 = "PREFIX : <http://example/>\n" ;

Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java?rev=1496169&r1=1496168&r2=1496169&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java
(original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformFilters.java
Mon Jun 24 19:19:16 2013
@@ -197,6 +197,12 @@ public class TestTransformFilters
              "      (bgp (triple ?s1 ?p2 ?o2)))))"
             ) ;
     }
+    
+    @Test public void equality17() {
+        test("(filter ((= ?x <http://constant1>) (= ?x <http://constant2>)) (join
(bgp (?x <http://p1> ?o1)) (bgp (?x <http://p2> ?o2))))",
+             t_equality,
+             (String[])null);
+    }
 
     // Related to JENA-432
     @Test public void optionalEqualitySubQuery_01() {
@@ -585,6 +591,14 @@ public class TestTransformFilters
              t_implicitJoin,
              (String[])null);
     }
+    
+    @Test public void implicitJoin9()
+    {
+        test(
+             "(filter ((= ?x ?y) (= ?x ?z)) (bgp (?x ?p ?o)(?y ?p1 ?z)))",
+             t_implicitJoin,
+             (String[])null);
+    }
         
     @Test public void implicitLeftJoin1()
     {
@@ -705,7 +719,7 @@ public class TestTransformFilters
                 "(leftjoin (bgp (?x ?p ?o)(?x <http://pred> ?y)) (assign ((?y ?x))
(bgp (?x <http://type> ?type)(?x ?p1 ?o1))))");
     }
     
-    @Test public void implicitJoin8()
+    @Test public void implicitLeftJoin8()
     {
         // We don't currently optimize the case where the filter will evaluate to false
         // for all solutions because neither variable in on the RHS
@@ -714,7 +728,7 @@ public class TestTransformFilters
              t_implicitLeftJoin,
              (String[])null);
     }
-    
+        
     @Test public void implicitLeftJoin9()
     {
         // && means both conditions must hold so can optimize out the implicit join
@@ -733,6 +747,60 @@ public class TestTransformFilters
              (String[])null);
     }
     
+    @Test public void implicitLeftJoin11()
+    {
+        // Unsafe to optimize because cannot guarantee that substituted variable is fixed
on RHS
+        test(
+             "(leftjoin (bgp (?x ?p ?o)) (leftjoin (bgp (?y ?p1 ?o1)) (bgp (?x ?p3 ?y)))
(= ?x ?y))",
+             t_implicitLeftJoin,
+             (String[])null);
+        
+        // Swapping the order of equality expressions should still leave it unsafe
+        test(
+                "(leftjoin (bgp (?x ?p ?o)) (leftjoin (bgp (?y ?p1 ?o1)) (bgp (?x ?p3 ?y)))
(= ?y ?x))",
+                t_implicitLeftJoin,
+                (String[])null);
+    }
+    
+    @Test public void implicitLeftJoin12()
+    {
+        // Unlike implicit join overlapping conditions can be safely optimized since the
left join
+        // optimizer is smart enough to apply the optimizations in an appropriate order
+        test(
+             "(leftjoin (bgp (?x ?p ?o)) (bgp (?y ?p1 ?o1) (?y ?p2 ?z)) ((= ?x ?y) (= ?x
?z)))",
+             t_implicitLeftJoin,
+             "(leftjoin (bgp (?x ?p ?o)) (assign ((?y ?x) (?z ?x)) (bgp (?x ?p1 ?o1) (?x
?p2 ?x))))");
+        
+        test(
+                "(leftjoin (bgp (?x ?p ?o)) (bgp (?y ?p1 ?o1) (?y ?p2 ?z)) ((= ?y ?x) (=
?x ?z)))",
+                t_implicitLeftJoin,
+                "(leftjoin (bgp (?x ?p ?o)) (assign ((?y ?x) (?z ?x)) (bgp (?x ?p1 ?o1) (?x
?p2 ?x))))");
+        
+        test(
+                "(leftjoin (bgp (?x ?p ?o)) (bgp (?y ?p1 ?o1) (?y ?p2 ?z)) ((= ?x ?y) (=
?z ?x)))",
+                t_implicitLeftJoin,
+                "(leftjoin (bgp (?x ?p ?o)) (assign ((?y ?x) (?z ?x)) (bgp (?x ?p1 ?o1) (?x
?p2 ?x))))");
+    }
+    
+    @Test public void implicitLeftJoin13()
+    {
+        // There are some overlapping conditions that cannot be safely optimized
+        test(
+                "(leftjoin (bgp (?x ?p ?o)) (bgp (?y ?p1 ?o1) (?y ?p2 ?z)) ((= ?x ?y) (=
?x ?z) (= ?y ?z)))",
+                t_implicitLeftJoin,
+                (String[])null);
+
+        test(
+                "(leftjoin (bgp (?x ?p ?o)) (bgp (?y ?p1 ?o1) (?y ?p2 ?z)) ((= ?z ?y) (=
?x ?z) (= ?x ?y)))",
+                t_implicitLeftJoin,
+                (String[])null);
+        
+        test(
+                "(leftjoin (bgp (?x ?p ?o)) (bgp (?y ?p1 ?o1) (?y ?p2 ?z)) ((= ?z ?y) (=
?z ?x) (= ?y ?x)))",
+                t_implicitLeftJoin,
+                (String[])null);
+    }
+    
     @Test public void implicitLeftJoinConditional1()
     {
         // Can be optimized because not all assigns block linearization



Mime
View raw message