jackrabbit-oak-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From thom...@apache.org
Subject svn commit: r1436361 - in /jackrabbit/oak/trunk/oak-core/src: main/java/org/apache/jackrabbit/oak/plugins/index/p2/ main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/ main/java/org/apache/jackrabbit/oak/query/ test/java/org/apache/jackrabbi...
Date Mon, 21 Jan 2013 14:16:21 GMT
Author: thomasm
Date: Mon Jan 21 14:16:20 2013
New Revision: 1436361

URL: http://svn.apache.org/viewvc?rev=1436361&view=rev
Log:
OAK-572 Query: improved cost estimation for the P2 and NodeType index

Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/Property2IndexLookup.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/ContentMirrorStoreStrategy.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/IndexStoreStrategy.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryEngineImpl.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/p2/Property2IndexTest.java

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/Property2IndexLookup.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/Property2IndexLookup.java?rev=1436361&r1=1436360&r2=1436361&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/Property2IndexLookup.java
(original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/Property2IndexLookup.java
Mon Jan 21 14:16:20 2013
@@ -106,7 +106,7 @@ public class Property2IndexLookup {
         if (state == null) {
             return Double.POSITIVE_INFINITY;
         }
-        Iterable<String> it = value == null ? null : Property2Index.encode(value);
+        List<String> it = value == null ? null : Property2Index.encode(value);
         return store.count(state, it, MAX_COST);
     }
 

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/ContentMirrorStoreStrategy.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/ContentMirrorStoreStrategy.java?rev=1436361&r1=1436360&r2=1436361&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/ContentMirrorStoreStrategy.java
(original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/ContentMirrorStoreStrategy.java
Mon Jan 21 14:16:20 2013
@@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.plugin
 import java.util.Collections;
 import java.util.Deque;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
@@ -125,29 +126,15 @@ public class ContentMirrorStoreStrategy 
             }
             indexEntry.setProperty("match", true);
         }
-        long matchCount = countMatchingLeaves(child.getNodeState(), 0, 2);
+        CountingNodeVisitor v = new CountingNodeVisitor(2);
+        v.visit(child.getNodeState());
+        int matchCount = v.getCount();
         if (matchCount == 0) {
             index.removeNode(key);
         } else if (unique && matchCount > 1) {
             throw new CommitFailedException("Uniqueness constraint violated");
         }
     }
-
-    static int countMatchingLeaves(NodeState state, int initialCount, int max) {
-        int count = initialCount;
-        if (state.getProperty("match") != null) {
-            count++;
-        }
-        if (count < max) {
-            for (ChildNodeEntry entry : state.getChildNodeEntries()) {
-                if (count >= max) {
-                    break;
-                }
-                count = countMatchingLeaves(entry.getNodeState(), count, max);
-            }
-        }
-        return count;
-    }
     
     @Override
     public Iterable<String> query(final Filter filter, final String indexName, 
@@ -175,19 +162,31 @@ public class ContentMirrorStoreStrategy 
     }
     
     @Override
-    public int count(NodeState index, Iterable<String> values, int max) {
+    public int count(NodeState index, List<String> values, int max) {
         int count = 0;
         if (values == null) {
-            count = countMatchingLeaves(index, count, max);
+            CountingNodeVisitor v = new CountingNodeVisitor(max);
+            v.visit(index);
+            count = v.getEstimatedCount();
         } else {
+            int size = values.size();
+            if (size == 0) {
+                return 0;
+            }
+            max = Math.max(10, max / size);
+            int i = 0;
             for (String p : values) {
-                if (count > max) {
+                if (count > max && i > 3) {
+                    count = count / size / i;
                     break;
                 }
                 NodeState s = index.getChildNode(p);
                 if (s != null) {
-                    count = countMatchingLeaves(s, count, max);
+                    CountingNodeVisitor v = new CountingNodeVisitor(max);
+                    v.visit(s);
+                    count += v.getEstimatedCount();
                 }
+                i++;
             }
         }
         return count;
@@ -311,5 +310,119 @@ public class ContentMirrorStoreStrategy 
         }
         
     }
+    
+    /**
+     * A node visitor to recursively traverse a number of nodes.
+     */
+    interface NodeVisitor {
+        void visit(NodeState state);
+    }
+    
+    /**
+     * A node visitor that counts the number of matching nodes up to a given
+     * maximum, in order to estimate the number of matches.
+     */
+    static class CountingNodeVisitor implements NodeVisitor {
+        
+        /**
+         * The maximum number of matching nodes to count.
+         */
+        final int maxCount;
+        
+        /**
+         * The current count of matching nodes.
+         */
+        int count;
+        
+        /**
+         * The current depth (number of parent nodes).
+         */
+        int depth;
+        
+        /**
+         * The total number of child nodes per node, for those nodes that were
+         * fully traversed and do have child nodes. This value is used to
+         * calculate the average width.
+         */
+        long widthTotal;
+        
+        /**
+         * The number of nodes that were fully traversed and do have child
+         * nodes. This value is used to calculate the average width.
+         */
+        int widthCount;
+        
+        /**
+         * The sum of the depth of all matching nodes. This value is used to
+         * calculate the average depth.
+         */
+        long depthTotal;
+        
+        CountingNodeVisitor(int maxCount) {
+            this.maxCount = maxCount;
+        }
+
+        @Override
+        public void visit(NodeState state) {
+            if (state.getProperty("match") != null) {
+                count++;
+                depthTotal += depth;
+            }
+            if (count < maxCount) {
+                depth++;
+                int width = 0;
+                boolean finished = true;
+                for (ChildNodeEntry entry : state.getChildNodeEntries()) {
+                    if (count >= maxCount) {
+                        finished = false;
+                        break;
+                    }
+                    width++;
+                    visit(entry.getNodeState());
+                }
+                if (finished && width > 0) {
+                    widthTotal += width;
+                    widthCount++;
+                }
+                depth--;
+            }
+        }
+        
+        /**
+         * The number of matches (at most the maximum count).
+         * 
+         * @return the match count
+         */
+        int getCount() {
+            return count;
+        }
+        
+        /**
+         * The number of estimated matches. This value might be higher than the
+         * number of counted matches, if the maximum number of matches has been
+         * reached. It is based on the average depth of matches, and the average
+         * number of child nodes.
+         * 
+         * @return the estimated matches
+         */
+        int getEstimatedCount() {
+            if (count < maxCount) {
+                return count;
+            }
+            double averageDepth = (int) (depthTotal / count);
+            double averageWidth = 2;
+            if (widthCount > 0) {
+                averageWidth = (int) (widthTotal / widthCount);
+            }
+            // calculate with an average width of at least 2
+            averageWidth = Math.max(2, averageWidth);
+            // the number of estimated matches is calculated as the
+            // of a estimated
+            long estimatedNodes = (long) Math.pow(averageWidth, 2 * averageDepth);
+            estimatedNodes = Math.min(estimatedNodes, Integer.MAX_VALUE);
+            return Math.max(count, (int) estimatedNodes);
+        }
+        
+    }
 
 }

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/IndexStoreStrategy.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/IndexStoreStrategy.java?rev=1436361&r1=1436360&r2=1436361&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/IndexStoreStrategy.java
(original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/p2/strategy/IndexStoreStrategy.java
Mon Jan 21 14:16:20 2013
@@ -16,6 +16,8 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.p2.strategy;
 
+import java.util.List;
+
 import org.apache.jackrabbit.oak.api.CommitFailedException;
 import org.apache.jackrabbit.oak.spi.query.Filter;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
@@ -71,6 +73,6 @@ public interface IndexStoreStrategy {
      * @param max the maximum value to return
      * @return the aggregated count of occurrences for each provided value
      */
-    int count(NodeState index, Iterable<String> values, int max);
+    int count(NodeState index, List<String> values, int max);
 
 }

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryEngineImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryEngineImpl.java?rev=1436361&r1=1436360&r2=1436361&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryEngineImpl.java
(original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryEngineImpl.java
Mon Jan 21 14:16:20 2013
@@ -162,6 +162,9 @@ public abstract class QueryEngineImpl im
 
     public QueryIndex getBestIndex(Query query, NodeState rootState, Filter filter) {
         QueryIndex best = null;
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("cost using filter " + filter);
+        }
         double bestCost = Double.POSITIVE_INFINITY;
         for (QueryIndex index : getIndexes(rootState)) {
             double cost = index.getCost(filter, rootState);

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/p2/Property2IndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/p2/Property2IndexTest.java?rev=1436361&r1=1436360&r2=1436361&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/p2/Property2IndexTest.java
(original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/p2/Property2IndexTest.java
Mon Jan 21 14:16:20 2013
@@ -17,6 +17,7 @@
 package org.apache.jackrabbit.oak.plugins.index.p2;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import java.util.Arrays;
@@ -76,12 +77,11 @@ public class Property2IndexTest {
         assertEquals(MANY, find(lookup, "foo", "xyz").size());
         assertEquals(MANY + 2, find(lookup, "foo", null).size());
 
-        assertEquals(Math.min(100, MANY), 
-                (int) lookup.getCost("foo", PropertyValues.newString("xyz")));
-        assertEquals(Math.min(100, MANY), 
-                (int) lookup.getCost("foo", null));
-
-        
+        double cost;
+        cost = lookup.getCost("foo", PropertyValues.newString("xyz"));
+        assertTrue("cost: " + cost, cost >= MANY);
+        cost = lookup.getCost("foo", null);
+        assertTrue("cost: " + cost, cost >= MANY);
     }
     
     private static Set<String> find(Property2IndexLookup lookup, String name, String
value) {



Mime
View raw message