lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ctarg...@apache.org
Subject [lucene-solr] 02/38: SOLR-15069: [child parentFilter=...] is now optional (#2181)
Date Fri, 15 Jan 2021 21:45:13 GMT
This is an automated email from the ASF dual-hosted git repository.

ctargett pushed a commit to branch jira/solr-13105-toMerge
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit fd1ca3b22bfe9564fb10a650c899b0202187f7dd
Author: David Smiley <dsmiley@apache.org>
AuthorDate: Wed Jan 6 17:43:15 2021 -0500

    SOLR-15069: [child parentFilter=...] is now optional (#2181)
---
 solr/CHANGES.txt                                   |  3 +
 .../response/transform/ChildDocTransformer.java    | 69 ++++++++++++++++++----
 .../transform/ChildDocTransformerFactory.java      | 12 ++--
 .../java/org/apache/solr/schema/IndexSchema.java   |  6 ++
 .../transform/TestChildDocTransformer.java         | 35 +++++++----
 .../src/transforming-result-documents.adoc         | 11 ++--
 6 files changed, 103 insertions(+), 33 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index eb2e73d..adc0820 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -236,6 +236,9 @@ Improvements
 
 * SOLR-15062: /api/cluster/zk/ls should give the stat of the current node (noble)
 
+* SOLR-15069: [child]: the parentFilter parameter is now fully optional and perhaps obsolete.
+  (David Smiley)
+
 Optimizations
 ---------------------
 * SOLR-14975: Optimize CoreContainer.getAllCoreNames, getLoadedCoreNames and getCoreDescriptors.
(Bruno Roustant)
diff --git a/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformer.java
b/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformer.java
index de00b82..ac89419 100644
--- a/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformer.java
+++ b/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformer.java
@@ -28,14 +28,22 @@ import java.util.Map;
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.Multimap;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.join.BitSetProducer;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.search.BitsFilteredPostingsEnum;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.search.SolrReturnFields;
@@ -52,20 +60,23 @@ class ChildDocTransformer extends DocTransformer {
   private static final String ANON_CHILD_KEY = "_childDocuments_";
 
   private final String name;
-  private final BitSetProducer parentsFilter;
+  private final BitSetProducer parentsFilter; // if null; resolve parent via uniqueKey instead
   private final DocSet childDocSet;
   private final int limit;
   private final boolean isNestedSchema;
   private final SolrReturnFields childReturnFields;
+  private String[] extraRequestedFields;
 
   ChildDocTransformer(String name, BitSetProducer parentsFilter, DocSet childDocSet,
-                      SolrReturnFields returnFields, boolean isNestedSchema, int limit) {
+                      SolrReturnFields returnFields, boolean isNestedSchema, int limit,
+                      String uniqueKeyField) {
     this.name = name;
     this.parentsFilter = parentsFilter;
     this.childDocSet = childDocSet;
     this.limit = limit;
     this.isNestedSchema = isNestedSchema;
     this.childReturnFields = returnFields!=null? returnFields: new SolrReturnFields();
+    this.extraRequestedFields = parentsFilter == null ? new String[]{uniqueKeyField} : null;
   }
 
   @Override
@@ -77,6 +88,37 @@ class ChildDocTransformer extends DocTransformer {
   public boolean needsSolrIndexSearcher() { return true; }
 
   @Override
+  public String[] getExtraRequestFields() {
+    return extraRequestedFields;
+  }
+
+  private int getPrevRootGivenFilter(LeafReaderContext leafReaderContext, int segRootId)
throws IOException {
+    final BitSet segParentsBitSet = parentsFilter.getBitSet(leafReaderContext);
+    if (segParentsBitSet != null) {
+      return segRootId == 0 ? -1 : segParentsBitSet.prevSetBit(segRootId - 1);
+    }
+    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+        "Parent filter '" + parentsFilter + "' doesn't match any parent documents");
+  }
+
+  private int getPrevRootGivenId(LeafReaderContext leafReaderContext, int segRootId,
+                                 BytesRef idBytes) throws IOException {
+    final LeafReader reader = leafReaderContext.reader();
+    final Terms terms = reader.terms(IndexSchema.ROOT_FIELD_NAME); // never returns null
here
+    final TermsEnum iterator = terms.iterator();
+    if (iterator.seekExact(idBytes)) {
+      PostingsEnum docs = iterator.postings(null, PostingsEnum.NONE);
+      docs = BitsFilteredPostingsEnum.wrap(docs, reader.getLiveDocs());
+      int id = docs.nextDoc();
+      if (id != DocIdSetIterator.NO_MORE_DOCS) {
+        assert id <= segRootId : "integrity violation";
+        return id - 1;
+      }
+    }
+    return segRootId - 1; // thus no child docs
+  }
+
+  @Override
   public void transform(SolrDocument rootDoc, int rootDocId) {
     // note: this algorithm works if both if we have have _nest_path_  and also if we don't!
 
@@ -87,18 +129,25 @@ class ChildDocTransformer extends DocTransformer {
       final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
       final int seg = ReaderUtil.subIndex(rootDocId, leaves);
       final LeafReaderContext leafReaderContext = leaves.get(seg);
+      final Bits liveDocs = leafReaderContext.reader().getLiveDocs();
       final int segBaseId = leafReaderContext.docBase;
       final int segRootId = rootDocId - segBaseId;
-      final BitSet segParentsBitSet = parentsFilter.getBitSet(leafReaderContext);
-      final Bits liveDocs = leafReaderContext.reader().getLiveDocs();
 
-      if (segParentsBitSet == null) {
-        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
-            "Parent filter '" + parentsFilter + "' doesn't match any parent documents");
+      // can return be -1 and that's okay  (happens for very first block)
+      final int segPrevRootId;
+      if (parentsFilter != null) {
+        segPrevRootId = getPrevRootGivenFilter(leafReaderContext, segRootId);
+      } else {
+        final IndexSchema schema = searcher.getSchema();
+        final String idStr = schema.printableUniqueKey(rootDoc);
+        if (idStr == null) {
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+              "[child] requires fl to include the ID");
+        }
+        final BytesRef idBytes = schema.indexableUniqueKey(idStr);
+        segPrevRootId = getPrevRootGivenId(leafReaderContext, rootDocId, idBytes);
       }
 
-      final int segPrevRootId = segRootId==0? -1: segParentsBitSet.prevSetBit(segRootId -
1); // can return -1 and that's okay
-
       if (segPrevRootId == (segRootId - 1)) {
         // doc has no children, return fast
         return;
diff --git a/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformerFactory.java
b/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformerFactory.java
index f5e1854..e841a59 100644
--- a/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformerFactory.java
+++ b/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformerFactory.java
@@ -41,7 +41,9 @@ import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME;
 /**
  * Attaches all descendants (child documents) to each parent document.
  *
- * The "parentFilter" parameter is mandatory if the schema is not of nest/hierarchy.
+ * Optionally you can provide a "parentFilter" param to designate which documents are the
root
+ * documents (parent-most documents).  Solr can figure this out on its own but you might
want to
+ * specify it.
  *
  * Optionally you can provide a "childFilter" param to filter out which child documents should
be returned and a
  * "limit" param which provides an option to specify the number of child documents
@@ -96,10 +98,7 @@ public class ChildDocTransformerFactory extends TransformerFactory {
     // DocSet parentDocSet = req.getSearcher().getDocSet(parentFilterQuery);
     // then return BitSetProducer with custom BitSet impl accessing the docSet
     if (parentFilterStr == null) {
-      if (!buildHierarchy) {
-        throw new SolrException(ErrorCode.BAD_REQUEST, "Parent filter should be sent as parentFilter=filterCondition");
-      }
-      parentsFilter = new QueryBitSetProducer(rootFilter);
+      parentsFilter = !buildHierarchy ? null : new QueryBitSetProducer(rootFilter);
     } else {
       if(buildHierarchy) {
         throw new SolrException(ErrorCode.BAD_REQUEST, "Parent filter should not be sent
when the schema is nested");
@@ -137,7 +136,8 @@ public class ChildDocTransformerFactory extends TransformerFactory {
 
     int limit = params.getInt( "limit", 10 );
 
-    return new ChildDocTransformer(field, parentsFilter, childDocSet, childSolrReturnFields,
buildHierarchy, limit);
+    return new ChildDocTransformer(field, parentsFilter, childDocSet, childSolrReturnFields,
+        buildHierarchy, limit, req.getSchema().getUniqueKeyField().getName());
   }
 
   private static Query parseQuery(String qstr, SolrQueryRequest req, String param) {
diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
index c2f3579..d41f395 100644
--- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
+++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
@@ -47,6 +47,7 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.queries.payloads.PayloadDecoder;
 import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Version;
 import org.apache.solr.common.ConfigNode;
 import org.apache.solr.common.MapSerializable;
@@ -359,6 +360,11 @@ public class IndexSchema {
     }
   }
 
+  /** Given a readable/printable uniqueKey value, return an indexable version */
+  public BytesRef indexableUniqueKey(String idStr) {
+    return new BytesRef(uniqueKeyFieldType.toInternal(idStr));
+  }
+
   private SchemaField getIndexedField(String fname) {
     SchemaField f = getFields().get(fname);
     if (f==null) {
diff --git a/solr/core/src/test/org/apache/solr/response/transform/TestChildDocTransformer.java
b/solr/core/src/test/org/apache/solr/response/transform/TestChildDocTransformer.java
index 1a13a24..48f2816 100644
--- a/solr/core/src/test/org/apache/solr/response/transform/TestChildDocTransformer.java
+++ b/solr/core/src/test/org/apache/solr/response/transform/TestChildDocTransformer.java
@@ -96,13 +96,14 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
         "/response/result/doc[1]/doc[2]/str[@name='id']='5'"};
 
     assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "*,[child parentFilter=\"subject:parentDocument\"]"), test1);
+        "fl", "*,[child]"), test1);
 
+    // shows parentFilter specified (not necessary any more) and also child
     assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:foo\"]"),
test2);
+        "fl", "id, subject,[child childFilter=\"title:foo\"]"), test2);
 
     assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\"
limit=2]"), test3);
+        "fl", "id, subject,[child childFilter=\"title:bar\" limit=2]"), test3);
 
     SolrException e = expectThrows(SolrException.class, () -> {
       h.query(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
@@ -240,13 +241,13 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
     };
 
     assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "*,[child parentFilter=\"subject:parentDocument\"]"), test1);
+        "fl", "*,[child]"), test1);
 
     assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:foo\"]"),
test2);
+        "fl", "id, subject,[child childFilter=\"title:foo\"]"), test2);
 
     assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\"
limit=3]"), test3);
+        "fl", "id, subject,[child childFilter=\"title:bar\" limit=3]"), test3);
   }
 
   private void testChildDocNonStoredDVFields() throws Exception {
@@ -271,26 +272,26 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
     };
 
     assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "*,[child parentFilter=\"subject:parentDocument\"]"), test1);
+        "fl", "*,[child]"), test1);
 
     assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "intDvoDefault, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:foo\"]"),
test2);
+        "fl", "intDvoDefault, subject,[child childFilter=\"title:foo\"]"), test2);
 
     assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "intDvoDefault, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\"
limit=2]"), test3);
+        "fl", "intDvoDefault, subject,[child childFilter=\"title:bar\" limit=2]"), test3);
 
   }
 
   private void testChildReturnFields() throws Exception {
 
     assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "*,[child parentFilter=\"subject:parentDocument\" fl=\"intDvoDefault,child_fl:[value
v='child_fl_test']\"]"),
+        "fl", "*,[child fl=\"intDvoDefault,child_fl:[value v='child_fl_test']\"]"),
         "/response/docs/[0]/intDefault==42",
         "/response/docs/[0]/_childDocuments_/[0]/intDvoDefault==42",
         "/response/docs/[0]/_childDocuments_/[0]/child_fl=='child_fl_test'");
 
     try(SolrQueryRequest req = req("q", "*:*", "fq", "subject:\"parentDocument\" ",
-        "fl", "intDefault,[child parentFilter=\"subject:parentDocument\" fl=\"intDvoDefault,
[docid]\"]")) {
+        "fl", "intDefault,[child fl=\"intDvoDefault, [docid]\"]")) {
       BasicResultContext res = (BasicResultContext) h.queryAndResponse("/select", req).getResponse();
       Iterator<SolrDocument> docsStreamer = res.getProcessedDocuments();
       while (docsStreamer.hasNext()) {
@@ -401,6 +402,14 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
                  "fl", "id, cat, title, [child childFilter='cat:childDocument' parentFilter=\"subject:parentDocument\"]"),
              tests);
 
+    // shows if parentFilter matches all docs, then there are effectively no child docs
+    assertJQ(req("q", "*:*",
+        "sort", "id asc",
+        "fq", "subject:\"parentDocument\" ",
+        "fl", "id,[child childFilter='cat:childDocument' parentFilter=\"*:*\"]"),
+        "/response==" +
+            "{'numFound':2,'start':0,'numFoundExact':true,'docs':[{'id':'1'},{'id':'4'}]}");
+
   }
   
   private void testSubQueryParentFilterJSON() throws Exception {
@@ -451,13 +460,13 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
     assertQ(req("q", "*:*", 
                 "sort", "id asc",
                 "fq", "subject:\"parentDocument\" ",
-                "fl", "*,[child childFilter='cat:childDocument' parentFilter=\"subject:parentDocument\"]"),

+                "fl", "*,[child childFilter='cat:childDocument']"), 
             tests);
 
     assertQ(req("q", "*:*", 
                 "sort", "id asc",
                 "fq", "subject:\"parentDocument\" ",
-                "fl", "id, cat, title, [child childFilter='cat:childDocument' parentFilter=\"subject:parentDocument\"]"),
+                "fl", "id, cat, title, [child childFilter='cat:childDocument']"),
             tests);
   }
 
diff --git a/solr/solr-ref-guide/src/transforming-result-documents.adoc b/solr/solr-ref-guide/src/transforming-result-documents.adoc
index 7394097..e1f2c5b 100644
--- a/solr/solr-ref-guide/src/transforming-result-documents.adoc
+++ b/solr/solr-ref-guide/src/transforming-result-documents.adoc
@@ -132,14 +132,11 @@ Note that this transformer can be used even when the query used to match
the res
 
 [source,plain]
 ----
-q=book_title:Solr&fl=id,[child parentFilter=doc_type:book childFilter=doc_type:chapter
limit=100]
+q=book_title:Solr&fl=id,[child childFilter=doc_type:chapter limit=100]
 ----
 
 If the documents involved include a `\_nest_path_` field, then it is used to re-create the
hierarchical structure of the descendent documents using the original psuedo-field names the
documents were indexed with, otherwise the descendent documents are returned as a flat list
of <<indexing-nested-documents#indexing-anonymous-children,anonymous children>>.
 
-`parentFilter`::
-When using a schema that does _not_ include the `\_nest_path_` field, this parameter is mandatory,
and serves the same purpose as the `of`/`which` parms in `{!child}`/`{!parent}` query parsers:
to identify the set of "all parents" for the purpose of identifying the begining & end
of each nested document block.  *When a schema _does_ include a  `\_nest_path_` field, this
parameter is prohibited.*
-
 `childFilter`::
 A query to filter which child documents should be included. This can be particularly useful
when you have multiple levels of hierarchical documents. The default is all children.
 
@@ -151,6 +148,12 @@ The field list which the transformer is to return. The default is the
top level
 +
 There is a further limitation in which the fields here should be a subset of those specified
by the top level `fl` parameter.
 
+`parentFilter`::
+Serves the same purpose as the `of`/`which` params in `{!child}`/`{!parent}` query parsers:
to
+identify the set of "all parents" for the purpose of identifying the beginning & end
of each
+nested document block.  This recently became fully optional and appears to be obsolete.
+It is likely to be removed in a future Solr release, so _if you find it has some use, let
the
+project know!_
 
 [TIP]
 ====


Mime
View raw message