From oak-commits-return-22928-archive-asf-public=cust-asf.ponee.io@jackrabbit.apache.org Tue Aug 3 13:53:42 2021 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mxout1-ec2-va.apache.org (mxout1-ec2-va.apache.org [3.227.148.255]) by mx-eu-01.ponee.io (Postfix) with ESMTPS id ACFF7180654 for ; Tue, 3 Aug 2021 15:53:42 +0200 (CEST) Received: from mail.apache.org (mailroute1-lw-us.apache.org [207.244.88.153]) by mxout1-ec2-va.apache.org (ASF Mail Server at mxout1-ec2-va.apache.org) with SMTP id EDF433EC16 for ; Tue, 3 Aug 2021 13:53:41 +0000 (UTC) Received: (qmail 227 invoked by uid 500); 3 Aug 2021 13:53:41 -0000 Mailing-List: contact oak-commits-help@jackrabbit.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: oak-dev@jackrabbit.apache.org Delivered-To: mailing list oak-commits@jackrabbit.apache.org Received: (qmail 206 invoked by uid 99); 3 Aug 2021 13:53:41 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 03 Aug 2021 13:53:41 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 7850C81F24; Tue, 3 Aug 2021 13:53:41 +0000 (UTC) Date: Tue, 03 Aug 2021 13:53:42 +0000 To: "oak-commits@jackrabbit.apache.org" Subject: [jackrabbit-oak] 01/01: OAK-9522 Index cost estimation: prefer union query with path restriction MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit From: thomasm@apache.org In-Reply-To: <162799882134.23857.2730778387535907769@gitbox.apache.org> References: <162799882134.23857.2730778387535907769@gitbox.apache.org> X-Git-Host: gitbox.apache.org X-Git-Repo: jackrabbit-oak X-Git-Refname: refs/heads/issues/OAK-9522 X-Git-Reftype: branch X-Git-Rev: 1c0900ff881a83236b9ad915c0649742137c2db0 X-Git-NotificationType: diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated Message-Id: <20210803135341.7850C81F24@gitbox.apache.org> This is an automated email from the ASF dual-hosted git repository. thomasm pushed a commit to branch issues/OAK-9522 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git commit 1c0900ff881a83236b9ad915c0649742137c2db0 Author: thomasm AuthorDate: Tue Aug 3 15:53:31 2021 +0200 OAK-9522 Index cost estimation: prefer union query with path restriction --- .../lucene/LuceneIndexPathRestrictionTest.java | 55 ++++++++++++++++++++++ .../search/spi/query/FulltextIndexPlanner.java | 33 ++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java index b555991..fbd8e9e 100644 --- a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java +++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java @@ -33,6 +33,7 @@ import org.apache.jackrabbit.oak.query.index.FilterImpl; import org.apache.jackrabbit.oak.spi.commit.EditorHook; import org.apache.jackrabbit.oak.spi.query.Cursor; import org.apache.jackrabbit.oak.spi.query.Filter; +import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction; import org.apache.jackrabbit.oak.spi.query.QueryIndex; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.oak.spi.state.NodeState; @@ -105,6 +106,53 @@ public class LuceneIndexPathRestrictionTest { } @Test + public void entryCountWithNoPathRestriction() throws Exception { + IndexDefinitionBuilder idxBuilder = + new LuceneIndexDefinitionBuilder(rootBuilder.child(IndexConstants.INDEX_DEFINITIONS_NAME). + child("fooIndex")) + .noAsync().evaluatePathRestrictions(); + idxBuilder.indexRule("nt:base").property("foo").propertyIndex(); + idxBuilder.build(); + commit(); + + NodeBuilder testRootBuilder = rootBuilder.child("test"); + for(int i=0; i<100; i++) { + testRootBuilder.child("n" + i).setProperty("foo", "bar"); + } + commit(); + + FilterImpl f; + + // //*[foo = 'bar'] + f = createFilter(root, NT_BASE); + f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar")); + // equality check: we assume 5 different values + assertEquals(20, getEstimatedCount(f)); + + // /jcr:root/test/*[foo = 'bar'] + f = createFilter(root, NT_BASE); + f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar")); + f.restrictPath("/test", PathRestriction.DIRECT_CHILDREN); + // direct children + equality check: we assume 5 different values, and 50% + assertEquals(10, getEstimatedCount(f)); + + // /jcr:root/test//*[foo = 'bar'] + f = createFilter(root, NT_BASE); + f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar")); + f.restrictPath("/test", PathRestriction.ALL_CHILDREN); + // descendants + equality check: we assume 5 different values, and 90% + assertEquals(18, getEstimatedCount(f)); + + // /jcr:root/test/x[foo = 'bar'] + f = createFilter(root, NT_BASE); + f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar")); + f.restrictPath("/test/x", PathRestriction.EXACT); + // exact path + equality check: we assume 5 different values, and 90% + assertEquals(1, getEstimatedCount(f)); + + } + + @Test public void pathTranformationWithAllChildrenPathRestriction() throws Exception { IndexDefinitionBuilder idxBuilder = new LuceneIndexDefinitionBuilder(rootBuilder.child(IndexConstants.INDEX_DEFINITIONS_NAME).child("fooIndex")) @@ -265,6 +313,13 @@ public class LuceneIndexPathRestrictionTest { assertEquals(f.toString(), expected, paths); } + private long getEstimatedCount(Filter f) { + List plans = index.getPlans(f, null, root); + assertEquals("Only one plan must show up", 1, plans.size()); + QueryIndex.IndexPlan plan = plans.get(0); + return plan.getEstimatedEntryCount(); + } + private void commit() throws Exception { root = HOOK.processCommit(rootBuilder.getBaseState(), rootBuilder.getNodeState(), EMPTY); rootBuilder = root.builder(); diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java index 53d5aa6..52288b8 100644 --- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java +++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java @@ -47,6 +47,7 @@ import org.apache.jackrabbit.oak.plugins.index.search.IndexStatistics; import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition; import org.apache.jackrabbit.oak.spi.filter.PathFilter; import org.apache.jackrabbit.oak.spi.query.Filter; +import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction; import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction; import org.apache.jackrabbit.oak.spi.query.QueryConstants; import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains; @@ -451,7 +452,7 @@ public class FulltextIndexPlanner { canHandleNativeFunction = false; } } - + if (!canHandleNativeFunction) { return null; } @@ -873,6 +874,36 @@ public class FulltextIndexPlanner { minNumDocs = docCntForField; } } + + // Reduce the estimation if the index supports path restrictions, + // and we have one that we can use. + // We don't need to be exact here, because we don't know the + // exact number of nodes in the subtree - this is taken care of + // in the query engine. + // But we need to adjust the cost a bit, so that + // the cost is lower if there is a usable path condition + // (see below). + if (definition.evaluatePathRestrictions()) { + PathRestriction pathRestriction = filter.getPathRestriction(); + if (pathRestriction == PathRestriction.EXACT || pathRestriction == PathRestriction.PARENT) { + // We have an exact path restriction and we have an index that indexes the path: + // then the result size is at most 1. + minNumDocs = 1; + } else if (pathRestriction == PathRestriction.DIRECT_CHILDREN) { + // We restrict to direct children: assume at most 50% are there. + minNumDocs /= 2; + } else if (pathRestriction != PathRestriction.NO_RESTRICTION) { + // Other restriction: assume at most 90%. + // This is important if we have + // "descendantnode(/content/abc) or issamenode(/content/abc)": + // We could either convert it to a union, or not use the path restriction. + // In this case, we want to convert it to a union! + // A path restriction will reduce the number of entries. + // So the cost of having a usable path condition is + // lower than the cost of not having a path condition at all. + minNumDocs = (int) (minNumDocs * 0.9); + } + } return minNumDocs; }