lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dsmi...@apache.org
Subject lucene-solr:branch_7x: SOLR-12441: New NestedUpdateProcessorFactory
Date Wed, 11 Jul 2018 03:02:29 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 937ae2c7b -> f3e8180bd


SOLR-12441: New NestedUpdateProcessorFactory

Closes #410
(cherry picked from commit fe180bb)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/f3e8180b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/f3e8180b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/f3e8180b

Branch: refs/heads/branch_7x
Commit: f3e8180bd71cdc3e51a72e9e529a8e69251bdda5
Parents: 937ae2c
Author: David Smiley <dsmiley@apache.org>
Authored: Tue Jul 10 22:59:41 2018 -0400
Committer: David Smiley <dsmiley@apache.org>
Committed: Tue Jul 10 23:02:15 2018 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../org/apache/solr/schema/IndexSchema.java     |   2 +
 .../processor/NestedUpdateProcessorFactory.java | 137 +++++++++++++
 .../solr/collection1/conf/schema15.xml          |   3 +
 .../conf/solrconfig-update-processor-chains.xml |   5 +
 .../solr/update/TestNestedUpdateProcessor.java  | 195 +++++++++++++++++++
 6 files changed, 345 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f3e8180b/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index d6c8f04..11f9bbe 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -75,6 +75,9 @@ New Features
 
 * SOLR-12495: An #EQUALS function for replica in autoscaling policy to equally distribute
replicas (noble)
 
+* SOLR-12441: New NestedUpdateProcessorFactory (URP) to populate special fields _NEST_PARENT_
and _NEST_PATH_ of nested
+  (child) documents.  It will generate a uniqueKey of nested docs if they were blank too.
(Moshe Bla, David Smiley)
+
 Bug Fixes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f3e8180b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
index e262c84..f57dc05 100644
--- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
+++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
@@ -108,6 +108,8 @@ public class IndexSchema {
   public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
   public static final String MAX_CHARS = "maxChars";
   public static final String NAME = "name";
+  public static final String NEST_PARENT_FIELD_NAME = "_NEST_PARENT_";
+  public static final String NEST_PATH_FIELD_NAME = "_NEST_PATH_";
   public static final String REQUIRED = "required";
   public static final String SCHEMA = "schema";
   public static final String SIMILARITY = "similarity";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f3e8180b/solr/core/src/java/org/apache/solr/update/processor/NestedUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/NestedUpdateProcessorFactory.java
b/solr/core/src/java/org/apache/solr/update/processor/NestedUpdateProcessorFactory.java
new file mode 100644
index 0000000..aa459bd
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/update/processor/NestedUpdateProcessorFactory.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.update.AddUpdateCommand;
+
+/**
+ * Adds fields to nested documents to support some nested search requirements.
+ * It can even generate uniqueKey fields for nested docs.
+ *
+ * @see IndexSchema#NEST_PARENT_FIELD_NAME
+ * @see IndexSchema#NEST_PATH_FIELD_NAME
+ *
+ * @since 7.5.0
+ */
+public class NestedUpdateProcessorFactory extends UpdateRequestProcessorFactory {
+
+  public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp,
UpdateRequestProcessor next ) {
+    boolean storeParent = shouldStoreDocParent(req.getSchema());
+    boolean storePath = shouldStoreDocPath(req.getSchema());
+    if(!(storeParent || storePath)) {
+      return next;
+    }
+    return new NestedUpdateProcessor(req, shouldStoreDocParent(req.getSchema()), shouldStoreDocPath(req.getSchema()),
next);
+  }
+
+  private static boolean shouldStoreDocParent(IndexSchema schema) {
+    return schema.getFields().containsKey(IndexSchema.NEST_PARENT_FIELD_NAME);
+  }
+
+  private static boolean shouldStoreDocPath(IndexSchema schema) {
+    return schema.getFields().containsKey(IndexSchema.NEST_PATH_FIELD_NAME);
+  }
+
+  private static class NestedUpdateProcessor extends UpdateRequestProcessor {
+    private static final String PATH_SEP_CHAR = "/";
+    private static final String NUM_SEP_CHAR = "#";
+    private static final String SINGULAR_VALUE_CHAR = "";
+    private boolean storePath;
+    private boolean storeParent;
+    private String uniqueKeyFieldName;
+
+
+    NestedUpdateProcessor(SolrQueryRequest req, boolean storeParent, boolean storePath, UpdateRequestProcessor
next) {
+      super(next);
+      this.storeParent = storeParent;
+      this.storePath = storePath;
+      this.uniqueKeyFieldName = req.getSchema().getUniqueKeyField().getName();
+    }
+
+    @Override
+    public void processAdd(AddUpdateCommand cmd) throws IOException {
+      SolrInputDocument doc = cmd.getSolrInputDocument();
+      processDocChildren(doc, null);
+      super.processAdd(cmd);
+    }
+
+    private void processDocChildren(SolrInputDocument doc, String fullPath) {
+      for(SolrInputField field: doc.values()) {
+        int childNum = 0;
+        boolean isSingleVal = !(field.getValue() instanceof Collection);
+        for(Object val: field) {
+          if(!(val instanceof SolrInputDocument)) {
+            // either all collection items are child docs or none are.
+            break;
+          }
+          final String fieldName = field.getName();
+
+          if(fieldName.contains(PATH_SEP_CHAR)) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field name: '"
+ fieldName
+                + "' contains: '" + PATH_SEP_CHAR + "' , which is reserved for the nested
URP");
+          }
+          final String sChildNum = isSingleVal ? SINGULAR_VALUE_CHAR : String.valueOf(childNum);
+          SolrInputDocument cDoc = (SolrInputDocument) val;
+          if(!cDoc.containsKey(uniqueKeyFieldName)) {
+            String parentDocId = doc.getField(uniqueKeyFieldName).getFirstValue().toString();
+            cDoc.setField(uniqueKeyFieldName, generateChildUniqueId(parentDocId, fieldName,
sChildNum));
+          }
+          final String lastKeyPath = fieldName + NUM_SEP_CHAR + sChildNum;
+          // concat of all paths children.grandChild => children#1/grandChild#
+          final String childDocPath = fullPath == null ? lastKeyPath : fullPath + PATH_SEP_CHAR
+ lastKeyPath;
+          processChildDoc((SolrInputDocument) val, doc, childDocPath);
+          ++childNum;
+        }
+      }
+    }
+
+    private void processChildDoc(SolrInputDocument sdoc, SolrInputDocument parent, String
fullPath) {
+      if(storePath) {
+        setPathField(sdoc, fullPath);
+      }
+      if (storeParent) {
+        setParentKey(sdoc, parent);
+      }
+      processDocChildren(sdoc, fullPath);
+    }
+
+    private String generateChildUniqueId(String parentId, String childKey, String childNum)
{
+      // combines parentId with the child's key and childNum. e.g. "10/footnote#1"
+      return parentId + PATH_SEP_CHAR + childKey + NUM_SEP_CHAR + childNum;
+    }
+
+    private void setParentKey(SolrInputDocument sdoc, SolrInputDocument parent) {
+      sdoc.setField(IndexSchema.NEST_PARENT_FIELD_NAME, parent.getFieldValue(uniqueKeyFieldName));
+    }
+
+    private void setPathField(SolrInputDocument sdoc, String fullPath) {
+      sdoc.setField(IndexSchema.NEST_PATH_FIELD_NAME, fullPath);
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f3e8180b/solr/core/src/test-files/solr/collection1/conf/schema15.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema15.xml b/solr/core/src/test-files/solr/collection1/conf/schema15.xml
index 80d19e9..5ca9529 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema15.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema15.xml
@@ -565,6 +565,9 @@
   <field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
   <!-- points to the root document of a block of nested documents -->
   <field name="_root_" type="string" indexed="true" stored="true"/>
+  <!-- required for NestedUpdateProcessor -->
+  <field name="_NEST_PARENT_" type="string" indexed="true" stored="true"/>
+  <field name="_NEST_PATH_" type="string" indexed="true" stored="true"/>
 
   <field name="multi_int_with_docvals" type="tint" multiValued="true" docValues="true"
indexed="false"/>
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f3e8180b/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
b/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
index 4113bd1..f22354e 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
@@ -30,6 +30,11 @@
   <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
   <schemaFactory class="ClassicIndexSchemaFactory"/>
 
+  <updateRequestProcessorChain name="nested">
+    <processor class="solr.NestedUpdateProcessorFactory" />
+    <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>
+
   <updateRequestProcessorChain name="comprehensive">
     <processor class="solr.FieldLengthUpdateProcessorFactory">
       <arr name="typeClass">

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f3e8180b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java
new file mode 100644
index 0000000..ab36f0e
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update;
+
+import java.util.List;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.update.processor.NestedUpdateProcessorFactory;
+import org.apache.solr.update.processor.UpdateRequestProcessor;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class TestNestedUpdateProcessor extends SolrTestCaseJ4 {
+
+  private static final char PATH_SEP_CHAR = '/';
+  private static final char NUM_SEP_CHAR = '#';
+  private static final String SINGLE_VAL_CHAR = "";
+  private static final String grandChildId = "4";
+  private static final String secondChildList = "anotherChildList";
+  private static final String jDoc = "{\n" +
+      "    \"add\": {\n" +
+      "        \"doc\": {\n" +
+      "            \"id\": \"1\",\n" +
+      "            \"children\": [\n" +
+      "                {\n" +
+      "                    \"id\": \"2\",\n" +
+      "                    \"foo_s\": \"Yaz\"\n" +
+      "                    \"grandChild\": \n" +
+      "                          {\n" +
+      "                             \"id\": \""+ grandChildId + "\",\n" +
+      "                             \"foo_s\": \"Jazz\"\n" +
+      "                          },\n" +
+      "                },\n" +
+      "                {\n" +
+      "                    \"id\": \"3\",\n" +
+      "                    \"foo_s\": \"Bar\"\n" +
+      "                }\n" +
+      "            ]\n" +
+                   secondChildList + ": [{\"id\": \"4\", \"last_s\": \"Smith\"}],\n" +
+      "        }\n" +
+      "    }\n" +
+      "}";
+
+  private static final String errDoc = "{\n" +
+      "    \"add\": {\n" +
+      "        \"doc\": {\n" +
+      "            \"id\": \"1\",\n" +
+      "            \"children" + PATH_SEP_CHAR + "a\": [\n" +
+      "                {\n" +
+      "                    \"id\": \"2\",\n" +
+      "                    \"foo_s\": \"Yaz\"\n" +
+      "                    \"grandChild\": \n" +
+      "                          {\n" +
+      "                             \"id\": \""+ grandChildId + "\",\n" +
+      "                             \"foo_s\": \"Jazz\"\n" +
+      "                          },\n" +
+      "                },\n" +
+      "                {\n" +
+      "                    \"id\": \"3\",\n" +
+      "                    \"foo_s\": \"Bar\"\n" +
+      "                }\n" +
+      "            ]\n" +
+      "        }\n" +
+      "    }\n" +
+      "}";
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig-update-processor-chains.xml", "schema15.xml");
+  }
+
+  @Before
+  public void before() throws Exception {
+    assertU(delQ("*:*"));
+    assertU(commit());
+  }
+
+  @Test
+  public void testDeeplyNestedURPGrandChild() throws Exception {
+    final String[] tests = {
+        "/response/docs/[0]/id=='4'",
+        "/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#0/grandChild#'"
+    };
+    indexSampleData(jDoc);
+
+    assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":*/grandChild#*",
+        "fl","*",
+        "sort","id desc",
+        "wt","json"),
+        tests);
+  }
+
+  @Test
+  public void testDeeplyNestedURPChildren() throws Exception {
+    final String[] childrenTests = {
+        "/response/docs/[0]/id=='2'",
+        "/response/docs/[1]/id=='3'",
+        "/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#0'",
+        "/response/docs/[1]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#1'"
+    };
+    indexSampleData(jDoc);
+
+    assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":children#?",
+        "fl","*",
+        "sort","id asc",
+        "wt","json"),
+        childrenTests);
+
+    assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":anotherChildList#?",
+        "fl","*",
+        "sort","id asc",
+        "wt","json"),
+        "/response/docs/[0]/id=='4'",
+        "/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='anotherChildList#0'");
+  }
+
+  @Test
+  public void testDeeplyNestedURPSanity() throws Exception {
+    SolrInputDocument docHierarchy = sdoc("id", "1", "children", sdocs(sdoc("id", "2", "name_s",
"Yaz"),
+        sdoc("id", "3", "name_s", "Jazz", "grandChild", sdoc("id", "4", "name_s", "Gaz"))),
"lonelyChild", sdoc("id", "5", "name_s", "Loner"));
+    UpdateRequestProcessor nestedUpdate = new NestedUpdateProcessorFactory().getInstance(req(),
null, null);
+    AddUpdateCommand cmd = new AddUpdateCommand(req());
+    cmd.solrDoc = docHierarchy;
+    nestedUpdate.processAdd(cmd);
+    cmd.clear();
+
+    List children = (List) docHierarchy.get("children").getValues();
+
+    SolrInputDocument firstChild = (SolrInputDocument) children.get(0);
+    assertEquals("SolrInputDocument(fields: [id=2, name_s=Yaz, _NEST_PATH_=children#0, _NEST_PARENT_=1])",
firstChild.toString());
+
+    SolrInputDocument secondChild = (SolrInputDocument) children.get(1);
+    assertEquals("SolrInputDocument(fields: [id=3, name_s=Jazz, grandChild=SolrInputDocument(fields:
[id=4, name_s=Gaz, _NEST_PATH_=children#1/grandChild#, _NEST_PARENT_=3]), _NEST_PATH_=children#1,
_NEST_PARENT_=1])", secondChild.toString());
+
+    SolrInputDocument grandChild = (SolrInputDocument)((SolrInputDocument) children.get(1)).get("grandChild").getValue();
+    assertEquals("SolrInputDocument(fields: [id=4, name_s=Gaz, _NEST_PATH_=children#1/grandChild#,
_NEST_PARENT_=3])", grandChild.toString());
+
+    SolrInputDocument singularChild = (SolrInputDocument) docHierarchy.get("lonelyChild").getValue();
+    assertEquals("SolrInputDocument(fields: [id=5, name_s=Loner, _NEST_PATH_=lonelyChild#,
_NEST_PARENT_=1])", singularChild.toString());
+  }
+
+  @Test
+  public void testDeeplyNestedURPChildrenWoId() throws Exception {
+    final String rootId = "1";
+    final String childKey = "grandChild";
+    final String expectedId = rootId + "/children#1/" + childKey + NUM_SEP_CHAR + SINGLE_VAL_CHAR;
+    SolrInputDocument noIdChildren = sdoc("id", rootId, "children", sdocs(sdoc("name_s",
"Yaz"), sdoc("name_s", "Jazz", childKey, sdoc("name_s", "Gaz"))));
+    UpdateRequestProcessor nestedUpdate = new NestedUpdateProcessorFactory().getInstance(req(),
null, null);
+    AddUpdateCommand cmd = new AddUpdateCommand(req());
+    cmd.solrDoc = noIdChildren;
+    nestedUpdate.processAdd(cmd);
+    cmd.clear();
+    List children = (List) noIdChildren.get("children").getValues();
+    SolrInputDocument idLessChild = (SolrInputDocument)((SolrInputDocument) children.get(1)).get(childKey).getValue();
+    assertTrue("Id less child did not get an Id", idLessChild.containsKey("id"));
+    assertEquals("Id less child was assigned an unexpected id", expectedId, idLessChild.getFieldValue("id").toString());
+  }
+
+  @Test
+  public void testDeeplyNestedURPFieldNameException() throws Exception {
+    final String errMsg = "contains: '" + PATH_SEP_CHAR + "' , which is reserved for the
nested URP";
+    thrown.expect(SolrException.class);
+    indexSampleData(errDoc);
+    thrown.expectMessage(errMsg);
+  }
+
+  private void indexSampleData(String cmd) throws Exception {
+    updateJ(cmd, params("update.chain", "nested"));
+    assertU(commit());
+  }
+}


Mime
View raw message