jackrabbit-oak-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From thom...@apache.org
Subject [jackrabbit-oak] 01/01: OAK-9522 Index cost estimation: prefer union query with path restriction
Date Tue, 03 Aug 2021 13:53:42 GMT
This is an automated email from the ASF dual-hosted git repository.

thomasm pushed a commit to branch issues/OAK-9522
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 1c0900ff881a83236b9ad915c0649742137c2db0
Author: thomasm <thomasm@apache.org>
AuthorDate: Tue Aug 3 15:53:31 2021 +0200

    OAK-9522 Index cost estimation: prefer union query with path restriction
---
 .../lucene/LuceneIndexPathRestrictionTest.java     | 55 ++++++++++++++++++++++
 .../search/spi/query/FulltextIndexPlanner.java     | 33 ++++++++++++-
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java
index b555991..fbd8e9e 100644
--- a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java
+++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java
@@ -33,6 +33,7 @@ import org.apache.jackrabbit.oak.query.index.FilterImpl;
 import org.apache.jackrabbit.oak.spi.commit.EditorHook;
 import org.apache.jackrabbit.oak.spi.query.Cursor;
 import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction;
 import org.apache.jackrabbit.oak.spi.query.QueryIndex;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
@@ -105,6 +106,53 @@ public class LuceneIndexPathRestrictionTest {
     }
 
     @Test
+    public void entryCountWithNoPathRestriction() throws Exception {
+        IndexDefinitionBuilder idxBuilder =
+                new LuceneIndexDefinitionBuilder(rootBuilder.child(IndexConstants.INDEX_DEFINITIONS_NAME).
+                        child("fooIndex"))
+                        .noAsync().evaluatePathRestrictions();
+        idxBuilder.indexRule("nt:base").property("foo").propertyIndex();
+        idxBuilder.build();
+        commit();
+
+        NodeBuilder testRootBuilder = rootBuilder.child("test");
+        for(int i=0; i<100; i++) {
+            testRootBuilder.child("n" + i).setProperty("foo", "bar");
+        }
+        commit();
+
+        FilterImpl f;
+
+        // //*[foo = 'bar']
+        f = createFilter(root, NT_BASE);
+        f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+        // equality check: we assume 5 different values
+        assertEquals(20, getEstimatedCount(f));
+
+        // /jcr:root/test/*[foo = 'bar']
+        f = createFilter(root, NT_BASE);
+        f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+        f.restrictPath("/test", PathRestriction.DIRECT_CHILDREN);
+        // direct children + equality check: we assume 5 different values, and 50%
+        assertEquals(10, getEstimatedCount(f));
+
+        // /jcr:root/test//*[foo = 'bar']
+        f = createFilter(root, NT_BASE);
+        f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+        f.restrictPath("/test", PathRestriction.ALL_CHILDREN);
+        // descendants + equality check: we assume 5 different values, and 90%
+        assertEquals(18, getEstimatedCount(f));
+
+        // /jcr:root/test/x[foo = 'bar']
+        f = createFilter(root, NT_BASE);
+        f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+        f.restrictPath("/test/x", PathRestriction.EXACT);
+        // exact path + equality check: we assume 5 different values, and 90%
+        assertEquals(1, getEstimatedCount(f));
+
+    }
+
+    @Test
     public void pathTranformationWithAllChildrenPathRestriction() throws Exception {
         IndexDefinitionBuilder idxBuilder =
                 new LuceneIndexDefinitionBuilder(rootBuilder.child(IndexConstants.INDEX_DEFINITIONS_NAME).child("fooIndex"))
@@ -265,6 +313,13 @@ public class LuceneIndexPathRestrictionTest {
         assertEquals(f.toString(), expected, paths);
     }
 
+    private long getEstimatedCount(Filter f) {
+        List<QueryIndex.IndexPlan> plans = index.getPlans(f, null, root);
+        assertEquals("Only one plan must show up", 1, plans.size());
+        QueryIndex.IndexPlan plan = plans.get(0);
+        return plan.getEstimatedEntryCount();
+    }
+
     private void commit() throws Exception {
         root = HOOK.processCommit(rootBuilder.getBaseState(), rootBuilder.getNodeState(),
EMPTY);
         rootBuilder = root.builder();
diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
index 53d5aa6..52288b8 100644
--- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
+++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
@@ -47,6 +47,7 @@ import org.apache.jackrabbit.oak.plugins.index.search.IndexStatistics;
 import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
 import org.apache.jackrabbit.oak.spi.filter.PathFilter;
 import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction;
 import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction;
 import org.apache.jackrabbit.oak.spi.query.QueryConstants;
 import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains;
@@ -451,7 +452,7 @@ public class FulltextIndexPlanner {
                 canHandleNativeFunction = false;
             }
         }
-        
+
         if (!canHandleNativeFunction) {
             return null;
         }
@@ -873,6 +874,36 @@ public class FulltextIndexPlanner {
                 minNumDocs = docCntForField;
             }
         }
+
+        // Reduce the estimation if the index supports path restrictions,
+        // and we have one that we can use.
+        // We don't need to be exact here, because we don't know the
+        // exact number of nodes in the subtree - this is taken care of
+        // in the query engine.
+        // But we need to adjust the cost a bit, so that
+        // the cost is lower if there is a usable path condition
+        // (see below).
+        if (definition.evaluatePathRestrictions()) {
+            PathRestriction pathRestriction = filter.getPathRestriction();
+            if (pathRestriction == PathRestriction.EXACT || pathRestriction == PathRestriction.PARENT)
{
+                // We have an exact path restriction and we have an index that indexes the
path:
+                // then the result size is at most 1.
+                minNumDocs = 1;
+            } else if (pathRestriction == PathRestriction.DIRECT_CHILDREN) {
+                // We restrict to direct children: assume at most 50% are there.
+                minNumDocs /= 2;
+            } else if (pathRestriction != PathRestriction.NO_RESTRICTION) {
+                // Other restriction: assume at most 90%.
+                // This is important if we have
+                // "descendantnode(/content/abc) or issamenode(/content/abc)":
+                // We could either convert it to a union, or not use the path restriction.
+                // In this case, we want to convert it to a union!
+                // A path restriction will reduce the number of entries.
+                // So the cost of having a usable path condition is
+                // lower than the cost of not having a path condition at all.
+                minNumDocs = (int) (minNumDocs * 0.9);
+            }
+        }
         return minNumDocs;
     }
 

Mime
View raw message