Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 669BA185AF for ; Tue, 2 Feb 2016 09:53:27 +0000 (UTC) Received: (qmail 52374 invoked by uid 500); 2 Feb 2016 09:53:14 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 52082 invoked by uid 99); 2 Feb 2016 09:53:14 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 02 Feb 2016 09:53:14 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id C30AFE00DC; Tue, 2 Feb 2016 09:53:13 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: cpoerschke@apache.org To: commits@lucene.apache.org Date: Tue, 02 Feb 2016 09:53:16 -0000 Message-Id: <2cdc19d8be26410da10dfdeb21e23190@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [04/21] lucene-solr git commit: SOLR-8532: GraphQuery don't collect edges at maxDepth level SOLR-8532: GraphQuery don't collect edges at maxDepth level Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e6db8ba2 Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e6db8ba2 Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e6db8ba2 Branch: refs/heads/master-solr-8621 Commit: e6db8ba2149e9733b7ca4d19a90ff9a36c75df1e Parents: c403083 Author: yonik Authored: Fri Jan 29 10:59:49 2016 -0500 Committer: yonik Committed: Fri Jan 29 10:59:49 2016 -0500 ---------------------------------------------------------------------- solr/CHANGES.txt | 3 + .../org/apache/solr/search/join/GraphQuery.java | 89 +++++++++++--------- .../solr/search/join/GraphTermsCollector.java | 2 +- .../apache/solr/search/join/GraphQueryTest.java | 23 +++++ 4 files changed, 78 insertions(+), 39 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/CHANGES.txt ---------------------------------------------------------------------- diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5ff042f..f5c88a3 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -172,6 +172,9 @@ Optimizations count. Also includes change to move to the next non-zero term value when selecting a segment position. (Keith Laban, Steve Bower, Dennis Gove) +* SOLR-8532: Optimize GraphQuery when maxDepth is set by not collecting edges at the maxDepth level. + (Kevin Watters via yonik) + Other Changes ---------------------- http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java ---------------------------------------------------------------------- diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java index 5f9bfd2..a31568a 100644 --- a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java +++ b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java @@ -135,8 +135,8 @@ public class GraphQuery extends Query { SolrIndexSearcher fromSearcher; private float queryNorm = 1.0F; private float queryWeight = 1.0F; - int frontierSize = 0; - public int currentDepth = 0; + private int frontierSize = 0; + private int currentDepth = -1; private Filter filter; private DocSet resultSet; @@ -177,69 +177,82 @@ public class GraphQuery extends Query { * @throws IOException - if a sub search fails... maybe other cases too! :) */ private DocSet getDocSet() throws IOException { - DocSet fromSet = null; - FixedBitSet seedResultBits = null; // Size that the bit set needs to be. int capacity = fromSearcher.getRawReader().maxDoc(); // The bit set to contain the results that match the query. FixedBitSet resultBits = new FixedBitSet(capacity); - // The measure of how deep in the graph we have gone. - currentDepth = 0; + // this holds the result at each level + BitDocSet fromSet = null; + // the root docs if we return root is false + FixedBitSet rootBits = null; // the initial query for the frontier for the first query Query frontierQuery = q; // Find all documents in this graph that are leaf nodes to speed traversal - // TODO: speed this up in the future with HAS_FIELD type queries - BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder(); - WildcardQuery edgeQuery = new WildcardQuery(new Term(toField, "*")); - leafNodeQuery.add(edgeQuery, Occur.MUST_NOT); - DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build()); + DocSet leafNodes = resolveLeafNodes(toField); // Start the breadth first graph traversal. + do { - // Create the graph result collector for this level - GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes); - // traverse the level! - fromSearcher.search(frontierQuery, graphResultCollector); - // All edge ids on the frontier. - BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms(); - frontierSize = collectorTerms.size(); - // The resulting doc set from the frontier. - fromSet = graphResultCollector.getDocSet(); - if (seedResultBits == null) { - // grab a copy of the seed bits (these are the "rootNodes") - seedResultBits = ((BitDocSet)fromSet).getBits().clone(); + // Increment how far we have gone in the frontier. + currentDepth++; + // if we are at the max level we don't need the graph terms collector. + // TODO validate that the join case works properly. + if (maxDepth != -1 && currentDepth >= maxDepth) { + // if we've reached the max depth, don't worry about collecting edges. + fromSet = fromSearcher.getDocSetBits(frontierQuery); + // explicitly the frontier size is zero now so we can break + frontierSize = 0; + } else { + // when we're not at the max depth level, we need to collect edges + // Create the graph result collector for this level + GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes); + fromSearcher.search(frontierQuery, graphResultCollector); + fromSet = graphResultCollector.getDocSet(); + // All edge ids on the frontier. + BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms(); + frontierSize = collectorTerms.size(); + // The resulting doc set from the frontier. + FrontierQuery fq = buildFrontierQuery(collectorTerms, frontierSize); + if (fq == null) { + // in case we get null back, make sure we know we're done at this level. + frontierSize = 0; + } else { + frontierQuery = fq.getQuery(); + frontierSize = fq.getFrontierSize(); + } } - Integer fs = new Integer(frontierSize); - FrontierQuery fq = buildFrontierQuery(collectorTerms, fs); - if (fq == null) { - // in case we get null back, make sure we know we're done at this level. - fq = new FrontierQuery(null, 0); + if (currentDepth == 0 && !returnRoot) { + // grab a copy of the root bits but only if we need it. + rootBits = fromSet.getBits(); } - frontierQuery = fq.getQuery(); - frontierSize = fq.getFrontierSize(); // Add the bits from this level to the result set. - resultBits.or(((BitDocSet)fromSet).getBits()); - // Increment how far we have gone in the frontier. - currentDepth++; - // Break out if we have reached our max depth - if (currentDepth >= maxDepth && maxDepth != -1) { + resultBits.or(fromSet.getBits()); + // test if we discovered any new edges, if not , we're done. + if ((maxDepth != -1 && currentDepth >= maxDepth)) { break; } - // test if we discovered any new edges, if not , we're done. } while (frontierSize > 0); // helper bit set operations on the final result set if (!returnRoot) { - resultBits.andNot(seedResultBits); + resultBits.andNot(rootBits); } + // this is the final resulting filter. BitDocSet resultSet = new BitDocSet(resultBits); // If we only want to return leaf nodes do that here. if (onlyLeafNodes) { return resultSet.intersection(leafNodes); } else { - // create a doc set off the bits that we found. return resultSet; } } + private DocSet resolveLeafNodes(String field) throws IOException { + BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder(); + WildcardQuery edgeQuery = new WildcardQuery(new Term(field, "*")); + leafNodeQuery.add(edgeQuery, Occur.MUST_NOT); + DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build()); + return leafNodes; + } + /** Build an automaton to represent the frontier query */ private Automaton buildAutomaton(BytesRefHash termBytesHash) { // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?) http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java ---------------------------------------------------------------------- diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java index 6af3694..389721e 100644 --- a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java +++ b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java @@ -108,7 +108,7 @@ class GraphTermsCollector extends SimpleCollector implements Collector { numHits++; } - public DocSet getDocSet() { + public BitDocSet getDocSet() { if (bits == null) { // TODO: this shouldn't happen bits = new FixedBitSet(maxDoc); http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java ---------------------------------------------------------------------- diff --git a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java index 4385dcc..1f5de65 100644 --- a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java +++ b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java @@ -77,6 +77,29 @@ public class GraphQueryTest extends SolrTestCaseJ4 { qr = createRequest(g4Query); assertQ(qr,"//*[@numFound='2']"); + String g5Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=0}id:doc_8"; + qr = createRequest(g5Query); + assertQ(qr,"//*[@numFound='1']"); + + String g6Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8"; + qr = createRequest(g6Query); + assertQ(qr,"//*[@numFound='3']"); + + String g7Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8"; + qr = createRequest(g7Query); + assertQ(qr,"//*[@numFound='2']"); + + String g8Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"true\" maxDepth=2}id:doc_8"; + qr = createRequest(g8Query); + assertQ(qr,"//*[@numFound='1']"); + + String g9Query = "{!graph from=\"node_id\" to=\"edge_id\" maxDepth=1}id:doc_1"; + qr = createRequest(g9Query); + assertQ(qr,"//*[@numFound='2']"); + + String g10Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=false maxDepth=1}id:doc_1"; + qr = createRequest(g10Query); + assertQ(qr,"//*[@numFound='1']"); } private SolrQueryRequest createRequest(String query) {