lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dsmi...@apache.org
Subject lucene-solr:branch_7x: SOLR-12361: Allow nested child documents to be in field values of a SolrInputDocument. * AddUpdateCommand and it's relationship with DirectUpdateHandler2 was reworked substantially.
Date Mon, 11 Jun 2018 14:54:12 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x ac196a996 -> 774416290


SOLR-12361: Allow nested child documents to be in field values of a SolrInputDocument.
* AddUpdateCommand and it's relationship with DirectUpdateHandler2 was reworked substantially.

Fixes #385

(cherry picked from commit 8095139)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/77441629
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/77441629
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/77441629

Branch: refs/heads/branch_7x
Commit: 7744162905e3d9c613262c23c242d238596d058f
Parents: ac196a9
Author: David Smiley <dsmiley@apache.org>
Authored: Mon Jun 11 10:48:30 2018 -0400
Committer: David Smiley <dsmiley@apache.org>
Committed: Mon Jun 11 10:54:07 2018 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   9 +-
 .../handler/component/RealTimeGetComponent.java |  11 +-
 .../org/apache/solr/schema/IndexSchema.java     |   3 +-
 .../apache/solr/update/AddUpdateCommand.java    | 204 ++++++++++---------
 .../solr/update/DirectUpdateHandler2.java       |  97 ++++-----
 .../org/apache/solr/update/DocumentBuilder.java |  37 +++-
 .../IgnoreLargeDocumentProcessorFactory.java    |  39 ++--
 .../apache/solr/update/AddBlockUpdateTest.java  | 173 ++++++++++++++--
 ...IgnoreLargeDocumentProcessorFactoryTest.java |  97 ++++++++-
 .../org/apache/solr/common/SolrDocument.java    |   1 -
 .../apache/solr/common/SolrDocumentBase.java    |  10 +
 .../apache/solr/common/SolrInputDocument.java   |   8 +-
 .../java/org/apache/solr/SolrTestCaseJ4.java    |  27 ++-
 13 files changed, 510 insertions(+), 206 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c6a5636..929e908 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -61,7 +61,11 @@ Optimizations
 Other Changes
 ----------------------
 
-(no changes)
+* SOLR-12361: Allow nested child documents to be in field values of a SolrInputDocument as an alternative to
+  add/get ChildDocuments off to the side.  The latter is now referred to as "anonymous" child documents as opposed to
+  "labelled" (by the field name).  Anonymous child docs might be deprecated in the future.
+  This is an internal change not yet plumbed into /update formats.
+  AddUpdateCommand and it's relationship with DirectUpdateHandler2 was reworked substantially. (Moshe Bla, David Smiley)
 
 ==================  7.4.0 ==================
 
@@ -93,6 +97,7 @@ Upgrade Notes
 
 New Features
 ----------------------
+
 * SOLR-12396: Upgrade Carrot2 to 3.16.0, HPPC to 0.8.1, morfologik to 2.1.5. (Dawid Weiss)
 
 * SOLR-11200: A new CMS config option 'ioThrottle' to manually enable/disable
@@ -190,6 +195,7 @@ New Features
 
 Bug Fixes
 ----------------------
+
 * SOLR-5351: Fixed More Like This Handler to use all fields provided in mlt.fl when used with
   content stream. The similarity is calculated between the content stream's value and all
   fields listed in mlt.fl. (Dawid Weiss)
@@ -540,6 +546,7 @@ Upgrade Notes
 
 New Features
 ----------------------
+
 * SOLR-11285: Simulation framework for autoscaling. (ab)
 
 * LUCENE-2899: In the Solr analysis-extras contrib, added support for the

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index ca59d49..ca78230 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -16,10 +16,6 @@
  */
 package org.apache.solr.handler.component;
 
-import static org.apache.solr.common.params.CommonParams.DISTRIB;
-import static org.apache.solr.common.params.CommonParams.ID;
-import static org.apache.solr.common.params.CommonParams.VERSION_FIELD;
-
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
@@ -89,6 +85,10 @@ import org.apache.solr.util.TestInjection;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.common.params.CommonParams.DISTRIB;
+import static org.apache.solr.common.params.CommonParams.ID;
+import static org.apache.solr.common.params.CommonParams.VERSION_FIELD;
+
 public class RealTimeGetComponent extends SearchComponent
 {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -771,8 +771,9 @@ public class RealTimeGetComponent extends SearchComponent
    * @lucene.experimental
    */
   public static SolrDocument toSolrDoc(SolrInputDocument sdoc, IndexSchema schema) {
+    // TODO what about child / nested docs?
     // TODO: do something more performant than this double conversion
-    Document doc = DocumentBuilder.toDocument(sdoc, schema, false);
+    Document doc = DocumentBuilder.toDocument(sdoc, schema);
 
     // copy the stored fields only
     Document out = new Document();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
index c77b572..e262c84 100644
--- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
+++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
@@ -1938,7 +1938,8 @@ public class IndexSchema {
    * @lucene.internal
    */
   public boolean isUsableForChildDocs() {
-    FieldType rootType = getFieldType(ROOT_FIELD_NAME);
+    //TODO make this boolean a field so it needn't be looked up each time?
+    FieldType rootType = getFieldTypeNoEx(ROOT_FIELD_NAME);
     return (null != uniqueKeyFieldType &&
             null != rootType &&
             rootType.getTypeName().equals(uniqueKeyFieldType.getTypeName()));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
index 3bfe934..cfa937e 100644
--- a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
+++ b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
@@ -17,7 +17,7 @@
 package org.apache.solr.update;
 
 import java.util.ArrayList;
-import java.util.Iterator;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.lucene.document.Document;
@@ -33,38 +33,43 @@ import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 
 /**
- *
+ * An {@link UpdateCommand} for adding or updating one document.  Technically more than one Lucene documents
+ * may be involved in the event of nested documents.
  */
-public class AddUpdateCommand extends UpdateCommand implements Iterable<Document> {
-   // optional id in "internal" indexed form... if it is needed and not supplied,
-   // it will be obtained from the doc.
-   private BytesRef indexedId;
-
-   /**
-    * Higher level SolrInputDocument, normally used to construct the Lucene Document
-    * to index.
-    */
-   public SolrInputDocument solrDoc;
-
-   /**
-    * This is the version of a document, previously indexed, on which the current
-    * update depends on. This version could be that of a previous in-place update
-    * or a full update. A negative value here, e.g. -1, indicates that this add
-    * update does not depend on a previous update.
-    */
-   public long prevVersion = -1;
-
-   public boolean overwrite = true;
-   
-   public Term updateTerm;
-
-   public int commitWithin = -1;
-
-   public boolean isLastDocInBatch = false;
-   
-   public AddUpdateCommand(SolrQueryRequest req) {
-     super(req);
-   }
+public class AddUpdateCommand extends UpdateCommand {
+
+  /**
+   * Higher level SolrInputDocument, normally used to construct the Lucene Document(s)
+   * to index.
+   */
+  public SolrInputDocument solrDoc;
+
+  /**
+   * This is the version of a document, previously indexed, on which the current
+   * update depends on. This version could be that of a previous in-place update
+   * or a full update. A negative value here, e.g. -1, indicates that this add
+   * update does not depend on a previous update.
+   */
+  public long prevVersion = -1;
+
+  public boolean overwrite = true;
+
+  /**
+   * The term to use to delete an existing document (for dedupe). (optional)
+   */
+  public Term updateTerm;
+
+  public int commitWithin = -1;
+
+  public boolean isLastDocInBatch = false;
+
+  // optional id in "internal" indexed form... if it is needed and not supplied,
+  // it will be obtained from the doc.
+  private BytesRef indexedId;
+
+  public AddUpdateCommand(SolrQueryRequest req) {
+    super(req);
+  }
 
   @Override
   public String name() {
@@ -85,20 +90,16 @@ public class AddUpdateCommand extends UpdateCommand implements Iterable<Document
      return solrDoc;
    }
 
-  /** Creates and returns a lucene Document to index.  Any changes made to the returned Document
-   * will not be reflected in the SolrInputDocument, or future calls to this method. This defaults
-   * to false for the inPlaceUpdate parameter of {@link #getLuceneDocument(boolean)}.
+  /**
+   * Creates and returns a lucene Document to index.
+   * Nested documents, if found, will cause an exception to be thrown.  Call {@link #getLuceneDocsIfNested()} for that.
+   * Any changes made to the returned Document will not be reflected in the SolrInputDocument, or future calls to this
+   * method.
+   * Note that the behavior of this is sensitive to {@link #isInPlaceUpdate()}.
    */
    public Document getLuceneDocument() {
-     return getLuceneDocument(false);
-   }
-
-   /** Creates and returns a lucene Document to index.  Any changes made to the returned Document
-    * will not be reflected in the SolrInputDocument, or future calls to this method.
-    * @param inPlaceUpdate Whether this document will be used for in-place updates.
-    */
-   public Document getLuceneDocument(boolean inPlaceUpdate) {
-     return DocumentBuilder.toDocument(getSolrInputDocument(), req.getSchema(), inPlaceUpdate);
+     final boolean ignoreNestedDocs = false; // throw an exception if found
+     return DocumentBuilder.toDocument(getSolrInputDocument(), req.getSchema(), isInPlaceUpdate(), ignoreNestedDocs);
    }
 
   /** Returns the indexed ID for this document.  The returned BytesRef is retained across multiple calls, and should not be modified. */
@@ -150,13 +151,12 @@ public class AddUpdateCommand extends UpdateCommand implements Iterable<Document
    * @return String id to hash
    */
   public String getHashableId() {
-    String id = null;
     IndexSchema schema = req.getSchema();
     SchemaField sf = schema.getUniqueKeyField();
     if (sf != null) {
       if (solrDoc != null) {
         SolrInputField field = solrDoc.getField(sf.getName());
-        
+
         int count = field == null ? 0 : field.getValueCount();
         if (count == 0) {
           if (overwrite) {
@@ -172,72 +172,94 @@ public class AddUpdateCommand extends UpdateCommand implements Iterable<Document
         }
       }
     }
-    return id;
+    return null;
   }
 
-  public boolean isBlock() {
-    return solrDoc.hasChildDocuments();
-  }
+  /**
+   * Computes the final flattened Solr docs that are ready to be converted to Lucene docs.  If no flattening is
+   * performed then we return null, and the caller ought to use {@link #getLuceneDocument()} instead.
+   * This should only be called once.
+   * Any changes made to the returned Document(s) will not be reflected in the SolrInputDocument,
+   * or future calls to this method.
+   */
+  public Iterable<Document> getLuceneDocsIfNested() {
+    assert ! isInPlaceUpdate() : "We don't expect this to happen."; // but should "work"?
+    if (!req.getSchema().isUsableForChildDocs()) {
+      // note if the doc is nested despite this, we'll throw an exception elsewhere
+      return null;
+    }
 
-  @Override
-  public Iterator<Document> iterator() {
-    return new Iterator<Document>() {
-      Iterator<SolrInputDocument> iter;
+    List<SolrInputDocument> all = flatten(solrDoc);
+    if (all.size() <= 1) {
+      return null; // caller should call getLuceneDocument() instead
+    }
 
-      {
-        List<SolrInputDocument> all = flatten(solrDoc);
+    String rootId = getHashableId();
 
-        String idField = getHashableId();
+    boolean isVersion = version != 0;
 
-        boolean isVersion = version != 0;
+    for (SolrInputDocument sdoc : all) {
+      sdoc.setField(IndexSchema.ROOT_FIELD_NAME, rootId);
+      if(isVersion) sdoc.setField(CommonParams.VERSION_FIELD, version);
+      // TODO: if possible concurrent modification exception (if SolrInputDocument not cloned and is being forwarded to replicas)
+      // then we could add this field to the generated lucene document instead.
+    }
 
-        for (SolrInputDocument sdoc : all) {
-          sdoc.setField(IndexSchema.ROOT_FIELD_NAME, idField);
-          if(isVersion) sdoc.setField(CommonParams.VERSION_FIELD, version);
-          // TODO: if possible concurrent modification exception (if SolrInputDocument not cloned and is being forwarded to replicas)
-          // then we could add this field to the generated lucene document instead.
-        }
+    return () -> all.stream().map(sdoc -> DocumentBuilder.toDocument(sdoc, req.getSchema())).iterator();
+  }
 
-        iter = all.iterator();
-     }
+  private List<SolrInputDocument> flatten(SolrInputDocument root) {
+    List<SolrInputDocument> unwrappedDocs = new ArrayList<>();
+    flattenAnonymous(unwrappedDocs, root, true);
+    flattenLabelled(unwrappedDocs, root, true);
+    unwrappedDocs.add(root);
 
-      @Override
-      public boolean hasNext() {
-        return iter.hasNext();
-      }
+    return unwrappedDocs;
+  }
 
-      @Override
-      public Document next() {
-        return DocumentBuilder.toDocument(iter.next(), req.getSchema());
+  /** Extract all child documents from parent that are saved in fields */
+  private void flattenLabelled(List<SolrInputDocument> unwrappedDocs, SolrInputDocument currentDoc, boolean isRoot) {
+    for (SolrInputField field: currentDoc.values()) {
+      Object value = field.getFirstValue();
+      // check if value is a childDocument
+      if (value instanceof SolrInputDocument) {
+        Object val = field.getValue();
+        if (!(val instanceof Collection)) {
+          flattenLabelled(unwrappedDocs, ((SolrInputDocument) val));
+          continue;
+        }
+        Collection<SolrInputDocument> childrenList = ((Collection) val);
+        for (SolrInputDocument child : childrenList) {
+          flattenLabelled(unwrappedDocs, child);
+        }
       }
+    }
 
-      @Override
-      public void remove() {
-        throw new UnsupportedOperationException();
-      }
-    };
+    if (!isRoot) unwrappedDocs.add(currentDoc);
   }
 
-  private List<SolrInputDocument> flatten(SolrInputDocument root) {
-    List<SolrInputDocument> unwrappedDocs = new ArrayList<>();
-    recUnwrapp(unwrappedDocs, root);
-    if (1 < unwrappedDocs.size() && ! req.getSchema().isUsableForChildDocs()) {
-      throw new SolrException
-        (SolrException.ErrorCode.BAD_REQUEST, "Unable to index docs with children: the schema must " +
-         "include definitions for both a uniqueKey field and the '" + IndexSchema.ROOT_FIELD_NAME +
-         "' field, using the exact same fieldType");
+  private void flattenLabelled(List<SolrInputDocument> unwrappedDocs, SolrInputDocument currentDoc) {
+    if(currentDoc.hasChildDocuments()) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+          "Anonymous child docs can only hang from others or the root: " + currentDoc);
     }
-    return unwrappedDocs;
+    flattenLabelled(unwrappedDocs, currentDoc, false);
   }
 
-  private void recUnwrapp(List<SolrInputDocument> unwrappedDocs, SolrInputDocument currentDoc) {
+  /** Extract all anonymous child documents from parent. */
+  private void flattenAnonymous(List<SolrInputDocument> unwrappedDocs, SolrInputDocument currentDoc, boolean isRoot) {
     List<SolrInputDocument> children = currentDoc.getChildDocuments();
     if (children != null) {
       for (SolrInputDocument child : children) {
-        recUnwrapp(unwrappedDocs, child);
+        flattenAnonymous(unwrappedDocs, child);
       }
     }
-    unwrappedDocs.add(currentDoc);
+
+    if(!isRoot) unwrappedDocs.add(currentDoc);
+  }
+
+  private void flattenAnonymous(List<SolrInputDocument> unwrappedDocs, SolrInputDocument currentDoc) {
+    flattenAnonymous(unwrappedDocs, currentDoc, false);
   }
 
   @Override
@@ -252,7 +274,7 @@ public class AddUpdateCommand extends UpdateCommand implements Iterable<Document
 
   /**
    * Is this add update an in-place update? An in-place update is one where only docValues are
-   * updated, and a new docment is not indexed.
+   * updated, and a new document is not indexed.
    */
   public boolean isInPlaceUpdate() {
     return (prevVersion >= 0);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
index 0e9052f..48c178e 100644
--- a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
+++ b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
@@ -36,7 +36,6 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SlowCodecReaderWrapper;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.function.ValueSource;
-import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.MatchAllDocsQuery;
@@ -315,9 +314,9 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
     RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
     try {
       IndexWriter writer = iw.get();
-
-      if (cmd.isBlock()) {
-        writer.addDocuments(cmd);
+      Iterable<Document> blockDocs = cmd.getLuceneDocsIfNested();
+      if (blockDocs != null) {
+        writer.addDocuments(blockDocs);
       } else {
         writer.addDocument(cmd.getLuceneDocument());
       }
@@ -330,31 +329,11 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
   }
 
   private void doNormalUpdate(AddUpdateCommand cmd) throws IOException {
-    Term updateTerm;
-    Term idTerm = getIdTerm(cmd);
-    boolean del = false;
-    if (cmd.updateTerm == null) {
-      updateTerm = idTerm;
-    } else {
-      // this is only used by the dedup update processor
-      del = true;
-      updateTerm = cmd.updateTerm;
-    }
-
     RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
     try {
       IndexWriter writer = iw.get();
 
-      updateDocOrDocValues(cmd, writer, updateTerm);
-
-      if (del) { // ensure id remains unique
-        BooleanQuery.Builder bq = new BooleanQuery.Builder();
-        bq.add(new BooleanClause(new TermQuery(updateTerm),
-            Occur.MUST_NOT));
-        bq.add(new BooleanClause(new TermQuery(idTerm), Occur.MUST));
-        writer.deleteDocuments(new DeleteByQueryWrapper(bq.build(), core.getLatestSchema()));
-      }
-
+      updateDocOrDocValues(cmd, writer);
 
       // Add to the transaction log *after* successfully adding to the
       // index, if there was no error.
@@ -368,13 +347,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
     } finally {
       iw.decref();
     }
-
-
-
   }
 
   private void addAndDelete(AddUpdateCommand cmd, List<UpdateLog.DBQ> deletesAfter) throws IOException {
-
+    // this logic is different enough from doNormalUpdate that it's separate
     log.info("Reordered DBQs detected.  Update=" + cmd + " DBQs="
         + deletesAfter);
     List<Query> dbqList = new ArrayList<>(deletesAfter.size());
@@ -389,15 +365,13 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
       }
     }
 
-    Term idTerm = getIdTerm(cmd);
-
     RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
     try {
       IndexWriter writer = iw.get();
 
       // see comment in deleteByQuery
       synchronized (solrCoreState.getUpdateLock()) {
-        updateDocOrDocValues(cmd, writer, idTerm);
+        updateDocOrDocValues(cmd, writer);
 
         if (cmd.isInPlaceUpdate() && ulog != null) {
           ulog.openRealtimeSearcher(); // This is needed due to LUCENE-7344.
@@ -413,10 +387,6 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
 
   }
 
-  private Term getIdTerm(AddUpdateCommand cmd) {
-    return new Term(cmd.isBlock() ? IndexSchema.ROOT_FIELD_NAME : idField.getName(), cmd.getIndexedId());
-  }
-
   private void updateDeleteTrackers(DeleteUpdateCommand cmd) {
     if ((cmd.getFlags() & UpdateCommand.IGNORE_AUTOCOMMIT) == 0) {
       if (commitWithinSoftCommit) {
@@ -938,7 +908,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
   }
 
   /**
-   * Calls either {@link IndexWriter#updateDocValues} or {@link IndexWriter#updateDocument} as 
+   * Calls either {@link IndexWriter#updateDocValues} or {@link IndexWriter#updateDocument}(s) as
    * needed based on {@link AddUpdateCommand#isInPlaceUpdate}.
    * <p>
    * If the this is an UPDATE_INPLACE cmd, then all fields included in 
@@ -948,38 +918,53 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
    *
    * @param cmd - cmd apply to IndexWriter
    * @param writer - IndexWriter to use
-   * @param updateTerm - used if this cmd results in calling {@link IndexWriter#updateDocument}
    */
-  private void updateDocOrDocValues(AddUpdateCommand cmd, IndexWriter writer, Term updateTerm) throws IOException {
-    assert null != cmd;
-    final SchemaField uniqueKeyField = cmd.req.getSchema().getUniqueKeyField();
-    final String uniqueKeyFieldName = null == uniqueKeyField ? null : uniqueKeyField.getName();
+  private void updateDocOrDocValues(AddUpdateCommand cmd, IndexWriter writer) throws IOException {
+    assert idField != null; // this code path requires an idField in order to potentially replace a doc
+    boolean hasUpdateTerm = cmd.updateTerm != null; // AKA dedupe
 
     if (cmd.isInPlaceUpdate()) {
-      Document luceneDocument = cmd.getLuceneDocument(true);
+      if (hasUpdateTerm) {
+        throw new IllegalStateException("cmd.updateTerm/dedupe is not compatible with in-place updates");
+      }
+      // we don't support the solrInputDoc with nested child docs either but we'll throw an exception if attempted
+
+      Term updateTerm = new Term(idField.getName(), cmd.getIndexedId());
+      Document luceneDocument = cmd.getLuceneDocument();
 
       final List<IndexableField> origDocFields = luceneDocument.getFields();
       final List<Field> fieldsToUpdate = new ArrayList<>(origDocFields.size());
       for (IndexableField field : origDocFields) {
-        if (! field.name().equals(uniqueKeyFieldName) ) {
+        if (! field.name().equals(updateTerm.field()) ) {
           fieldsToUpdate.add((Field)field);
         }
       }
       log.debug("updateDocValues({})", cmd);
       writer.updateDocValues(updateTerm, fieldsToUpdate.toArray(new Field[fieldsToUpdate.size()]));
-    } else {
-      updateDocument(cmd, writer, updateTerm);
-    }
-  }
 
-  private void updateDocument(AddUpdateCommand cmd, IndexWriter writer, Term updateTerm) throws IOException {
-    if (cmd.isBlock()) {
-      log.debug("updateDocuments({})", cmd);
-      writer.updateDocuments(updateTerm, cmd);
-    } else {
-      Document luceneDocument = cmd.getLuceneDocument(false);
-      log.debug("updateDocument({})", cmd);
-      writer.updateDocument(updateTerm, luceneDocument);
+    } else { // more normal path
+
+      Iterable<Document> blockDocs = cmd.getLuceneDocsIfNested();
+      boolean isBlock = blockDocs != null; // AKA nested child docs
+      Term idTerm = new Term(isBlock ? IndexSchema.ROOT_FIELD_NAME : idField.getName(), cmd.getIndexedId());
+      Term updateTerm = hasUpdateTerm ? cmd.updateTerm : idTerm;
+      if (isBlock) {
+        log.debug("updateDocuments({})", cmd);
+        writer.updateDocuments(updateTerm, blockDocs);
+      } else {
+        Document luceneDocument = cmd.getLuceneDocument();
+        log.debug("updateDocument({})", cmd);
+        writer.updateDocument(updateTerm, luceneDocument);
+      }
+
+      // If hasUpdateTerm, then delete any existing documents with the same ID other than the one added above
+      //   (used in near-duplicate replacement)
+      if (hasUpdateTerm) { // rare
+        BooleanQuery.Builder bq = new BooleanQuery.Builder();
+        bq.add(new TermQuery(updateTerm), Occur.MUST_NOT); //don't want the one we added above (will be unique)
+        bq.add(new TermQuery(idTerm), Occur.MUST); // same ID
+        writer.deleteDocuments(new DeleteByQueryWrapper(bq.build(), core.getLatestSchema()));
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
index 58638ae..8fc5541 100644
--- a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
+++ b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
@@ -25,6 +25,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrDocumentBase;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.SolrInputField;
@@ -94,11 +95,11 @@ public class DocumentBuilder {
   }
 
   /**
-   * @see DocumentBuilder#toDocument(SolrInputDocument, IndexSchema, boolean)
+   * @see DocumentBuilder#toDocument(SolrInputDocument, IndexSchema, boolean, boolean)
    */
   public static Document toDocument( SolrInputDocument doc, IndexSchema schema )
   {
-    return toDocument(doc, schema, false);
+    return toDocument(doc, schema, false, true);
   }
   
   /**
@@ -119,11 +120,14 @@ public class DocumentBuilder {
    * @param schema Schema instance
    * @param forInPlaceUpdate Whether the output document would be used for an in-place update or not. When this is true,
    *        default fields values and copy fields targets are not populated.
+   * @param ignoreNestedDocs if nested child documents should be ignored.  If false then an exception will be thrown.
    * @return Built Lucene document
-
    */
-  public static Document toDocument( SolrInputDocument doc, IndexSchema schema, boolean forInPlaceUpdate )
-  {
+  public static Document toDocument(SolrInputDocument doc, IndexSchema schema, boolean forInPlaceUpdate, boolean ignoreNestedDocs) {
+    if (!ignoreNestedDocs && doc.hasChildDocuments()) {
+      throw unexpectedNestedDocException(schema, forInPlaceUpdate);
+    }
+
     final SchemaField uniqueKeyField = schema.getUniqueKeyField();
     final String uniqueKeyFieldName = null == uniqueKeyField ? null : uniqueKeyField.getName();
     
@@ -132,6 +136,14 @@ public class DocumentBuilder {
     
     // Load fields from SolrDocument to Document
     for( SolrInputField field : doc ) {
+
+      if (field.getFirstValue() instanceof SolrDocumentBase) {
+        if (ignoreNestedDocs) {
+          continue;
+        }
+        throw unexpectedNestedDocException(schema, forInPlaceUpdate);
+      }
+
       String name = field.getName();
       SchemaField sfield = schema.getFieldOrNull(name);
       boolean used = false;
@@ -243,6 +255,21 @@ public class DocumentBuilder {
     return out;
   }
 
+  private static SolrException unexpectedNestedDocException(IndexSchema schema, boolean forInPlaceUpdate) {
+    if (! schema.isUsableForChildDocs()) {
+      return new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+          "Unable to index docs with children: the schema must " +
+              "include definitions for both a uniqueKey field and the '" + IndexSchema.ROOT_FIELD_NAME +
+              "' field, using the exact same fieldType");
+    } else if (forInPlaceUpdate) {
+      return new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+          "Unable to index docs with children: for an in-place update, just provide the doc by itself");
+    } else {
+      return new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+          "A document unexpectedly contained nested child documents");
+    }
+  }
+
   /** Move the largest stored field last, because Lucene can avoid loading that one if it's not needed. */
   private static void moveLargestFieldLast(Document doc) {
     String largestField = null;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/core/src/java/org/apache/solr/update/processor/IgnoreLargeDocumentProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/IgnoreLargeDocumentProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/IgnoreLargeDocumentProcessorFactory.java
index 1782438..3354ca6 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/IgnoreLargeDocumentProcessorFactory.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/IgnoreLargeDocumentProcessorFactory.java
@@ -24,6 +24,7 @@ import java.util.Map;
 
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
@@ -63,7 +64,7 @@ public class IgnoreLargeDocumentProcessorFactory extends UpdateRequestProcessorF
     return new UpdateRequestProcessor(next) {
       @Override
       public void processAdd(AddUpdateCommand cmd) throws IOException {
-        long docSize = ObjectSizeEstimator.fastEstimate(cmd.getSolrInputDocument());
+        long docSize = ObjectSizeEstimator.estimate(cmd.getSolrInputDocument());
         if (docSize / 1024 > maxDocumentSize) {
           throw new SolrException(BAD_REQUEST, "Size of the document "+cmd.getPrintableId()+" is too large, around:"+docSize);
         }
@@ -108,37 +109,36 @@ public class IgnoreLargeDocumentProcessorFactory extends UpdateRequestProcessorF
       primitiveSizes.put(Long.class, Long.BYTES);
     }
 
-    static long fastEstimate(SolrInputDocument doc) {
+    static long estimate(SolrInputDocument doc) {
       if (doc == null) return 0L;
       long size = 0;
-      if (doc.getFieldNames() != null) {
-        for (String fieldName : doc.getFieldNames()) {
-          size += fastEstimate(fieldName) + fastEstimate(doc.getField(fieldName).getValue());
-        }
+      for (SolrInputField inputField : doc.values()) {
+        size += primitiveEstimate(inputField.getName(), 0L);
+        size += estimate(inputField.getValue());
       }
+
       if (doc.hasChildDocuments()) {
         for (SolrInputDocument childDoc : doc.getChildDocuments()) {
-          size += fastEstimate(childDoc);
+          size += estimate(childDoc);
         }
       }
       return size;
     }
 
-    static long fastEstimate(Object obj) {
-      if (obj == null) return 0;
-
-      long size = primitiveEstimate(obj, -1);
-      if (size != -1) return size;
+    static long estimate(Object obj) {
+      if (obj instanceof SolrInputDocument) {
+        return estimate((SolrInputDocument) obj);
+      }
 
       if (obj instanceof Map) {
-        return fastEstimate((Map) obj);
+        return estimate((Map) obj);
       }
 
       if (obj instanceof Collection) {
-        return fastEstimate((Collection) obj);
+        return estimate((Collection) obj);
       }
 
-      return 0L;
+      return primitiveEstimate(obj, 0L);
     }
 
     private static long primitiveEstimate(Object obj, long def) {
@@ -152,20 +152,21 @@ public class IgnoreLargeDocumentProcessorFactory extends UpdateRequestProcessorF
       return def;
     }
 
-    private static long fastEstimate(Map<Object, Object> map) {
+    private static long estimate(Map<Object, Object> map) {
       if (map.isEmpty()) return 0;
       long size = 0;
       for (Map.Entry<Object, Object> entry : map.entrySet()) {
-        size += primitiveEstimate(entry.getKey(), 0L) + primitiveEstimate(entry.getValue(), 0L);
+        size += primitiveEstimate(entry.getKey(), 0L);
+        size += estimate(entry.getValue());
       }
       return size;
     }
 
-    private static long fastEstimate(Collection collection) {
+    private static long estimate(Collection collection) {
       if (collection.isEmpty()) return 0;
       long size = 0;
       for (Object obj : collection) {
-        size += primitiveEstimate(obj, 0L);
+        size += estimate(obj);
       }
       return size;
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
index cbd91c6..0c36e15 100644
--- a/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
+++ b/solr/core/src/test/org/apache/solr/update/AddBlockUpdateTest.java
@@ -16,6 +16,9 @@
  */
 package org.apache.solr.update;
 
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -26,16 +29,15 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
+import java.util.Map;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-
 import org.apache.commons.io.output.ByteArrayOutputStream;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.TermQuery;
@@ -47,10 +49,14 @@ import org.apache.lucene.search.join.ToParentBlockJoinQuery;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.request.RequestWriter;
 import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.JavaBinCodec;
 import org.apache.solr.handler.loader.XMLLoader;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.DefaultSolrThreadFactory;
 import org.apache.solr.util.RefCounted;
@@ -61,7 +67,9 @@ import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.SAXException;
@@ -73,7 +81,7 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
   private static final String child = "child_s";
   private static final String parent = "parent_s";
   private static final String type = "type_s";
-  
+
   private static ExecutorService exe;
   private static AtomicInteger counter = new AtomicInteger();
   private static boolean cachedMode;
@@ -82,6 +90,9 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
   
   private RefCounted<SolrIndexSearcher> searcherRef;
   private SolrIndexSearcher _searcher;
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
   
   @BeforeClass
   public static void beforeClass() throws Exception {
@@ -159,13 +170,13 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
                doc(child,"d","id", "66")), "overwrite", "false"));
     
     assertU(commit());
-    
+
     assertQ(req(parent+":"+overwritten, "//*[@numFound='1']"));
     assertQ(req(parent+":"+dubbed, "//*[@numFound='2']"));
-    
+
     final SolrIndexSearcher searcher = getSearcher();
     assertSingleParentOf(searcher, one("ab"), dubbed);
-    
+
     final TopDocs docs = searcher.search(join(one("cd")), 10);
     assertEquals(2, docs.totalHits);
     final String pAct = searcher.doc(docs.scoreDocs[0].doc).get(parent)+
@@ -260,10 +271,96 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
     assertQ(req(parent + ":Y"), "//*[@numFound='0']");
     assertQ(req(parent + ":W"), "//*[@numFound='0']");
   }
-  
+
+  @Test
+  public void testExceptionThrownChildDocWAnonymousChildren() throws Exception {
+    SolrInputDocument document1 = sdoc("id", id(), parent, "X",
+        "child1_s", sdoc("id", id(), "child_s", "y"),
+        "child2_s", sdoc("id", id(), "child_s", "z"));
+
+    SolrInputDocument exceptionChildDoc = (SolrInputDocument) document1.get("child1_s").getValue();
+    addChildren("child", exceptionChildDoc, 0, false);
+
+    thrown.expect(SolrException.class);
+    final String expectedMessage = "Anonymous child docs can only hang from others or the root";
+    thrown.expectMessage(expectedMessage);
+    indexSolrInputDocumentsDirectly(document1);
+  }
+
+  @Test
+  public void testSolrNestedFieldsList() throws Exception {
+
+    final String id1 = id();
+    List<SolrInputDocument> children1 = Arrays.asList(sdoc("id", id(), child, "y"), sdoc("id", id(), child, "z"));
+
+    SolrInputDocument document1 = sdoc("id", id1, parent, "X",
+        "children", children1);
+
+    final String id2 = id();
+    List<SolrInputDocument> children2 = Arrays.asList(sdoc("id", id(), child, "b"), sdoc("id", id(), child, "c"));
+
+    SolrInputDocument document2 = sdoc("id", id2, parent, "A",
+        "children", children2);
+
+    indexSolrInputDocumentsDirectly(document1, document2);
+
+    final SolrIndexSearcher searcher = getSearcher();
+    assertJQ(req("q","*:*",
+        "fl","*",
+        "sort","id asc",
+        "wt","json"),
+        "/response/numFound==" + "XyzAbc".length());
+    assertJQ(req("q",parent+":" + document2.getFieldValue(parent),
+        "fl","*",
+        "sort","id asc",
+        "wt","json"),
+        "/response/docs/[0]/id=='" + document2.getFieldValue("id") + "'");
+    assertQ(req("q",child+":(y z b c)", "sort","_docid_ asc"),
+        "//*[@numFound='" + "yzbc".length() + "']", // assert physical order of children
+        "//doc[1]/arr[@name='child_s']/str[text()='y']",
+        "//doc[2]/arr[@name='child_s']/str[text()='z']",
+        "//doc[3]/arr[@name='child_s']/str[text()='b']",
+        "//doc[4]/arr[@name='child_s']/str[text()='c']");
+    assertSingleParentOf(searcher, one("bc"), "A");
+    assertSingleParentOf(searcher, one("yz"), "X");
+  }
+
+  @Test
+  public void testSolrNestedFieldsSingleVal() throws Exception {
+    SolrInputDocument document1 = sdoc("id", id(), parent, "X",
+        "child1_s", sdoc("id", id(), "child_s", "y"),
+        "child2_s", sdoc("id", id(), "child_s", "z"));
+
+    SolrInputDocument document2 = sdoc("id", id(), parent, "A",
+        "child1_s", sdoc("id", id(), "child_s", "b"),
+        "child2_s", sdoc("id", id(), "child_s", "c"));
+
+    indexSolrInputDocumentsDirectly(document1, document2);
+
+    final SolrIndexSearcher searcher = getSearcher();
+    assertJQ(req("q","*:*",
+        "fl","*",
+        "sort","id asc",
+        "wt","json"),
+        "/response/numFound==" + "XyzAbc".length());
+    assertJQ(req("q",parent+":" + document2.getFieldValue(parent),
+        "fl","*",
+        "sort","id asc",
+        "wt","json"),
+        "/response/docs/[0]/id=='" + document2.getFieldValue("id") + "'");
+    assertQ(req("q",child+":(y z b c)", "sort","_docid_ asc"),
+        "//*[@numFound='" + "yzbc".length() + "']", // assert physical order of children
+        "//doc[1]/arr[@name='child_s']/str[text()='y']",
+        "//doc[2]/arr[@name='child_s']/str[text()='z']",
+        "//doc[3]/arr[@name='child_s']/str[text()='b']",
+        "//doc[4]/arr[@name='child_s']/str[text()='c']");
+    assertSingleParentOf(searcher, one("bc"), "A");
+    assertSingleParentOf(searcher, one("yz"), "X");
+  }
+
   @SuppressWarnings("serial")
   @Test
-  public void testSolrJXML() throws IOException {
+  public void testSolrJXML() throws Exception {
     UpdateRequest req = new UpdateRequest();
     
     List<SolrInputDocument> docs = new ArrayList<>();
@@ -323,7 +420,12 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
     requestWriter.write(req, os);
     assertBlockU(os.toString());
     assertU(commit());
-    
+
+    assertJQ(req("q","*:*",
+        "fl","*",
+        "sort","id asc",
+        "wt","json"),
+        "/response/numFound==" + 6);
     final SolrIndexSearcher searcher = getSearcher();
     assertSingleParentOf(searcher, one("yz"), "X");
     assertSingleParentOf(searcher, one("bc"), "A");
@@ -398,7 +500,37 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
     assertSingleParentOf(searcher, one("bc"), "A");
        
   }
-  
+
+  @Test
+  public void testJavaBinCodecNestedRelation() throws IOException {
+    SolrInputDocument topDocument = new SolrInputDocument();
+    topDocument.addField("parent_f1", "v1");
+    topDocument.addField("parent_f2", "v2");
+
+    int childsNum = atLeast(10);
+    Map<String, SolrInputDocument> children = new HashMap<>(childsNum);
+    for(int i = 0; i < childsNum; ++i) {
+      SolrInputDocument child = new SolrInputDocument();
+      child.addField("key", (i + 5) * atLeast(4));
+      String childKey = String.format(Locale.ROOT, "child%d", i);
+      topDocument.addField(childKey, child);
+      children.put(childKey, child);
+    }
+
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    try (JavaBinCodec jbc = new JavaBinCodec()) {
+      jbc.marshal(topDocument, os);
+    }
+    byte[] buffer = os.toByteArray();
+    //now read the Object back
+    SolrInputDocument result;
+    try (JavaBinCodec jbc = new JavaBinCodec(); InputStream is = new ByteArrayInputStream(buffer)) {
+      result = (SolrInputDocument) jbc.unmarshal(is);
+    }
+
+    assertTrue(compareSolrInputDocument(topDocument, result));
+  }
+
   
   @Test
   public void testJavaBinCodec() throws IOException { //actually this test must be in other test class
@@ -521,6 +653,23 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
       attachField(document, type, String.valueOf(typeValue));
     }
   }
+
+  private void indexSolrInputDocumentsDirectly(SolrInputDocument ... docs) throws IOException {
+    SolrQueryRequest coreReq = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams());
+    AddUpdateCommand updateCmd = new AddUpdateCommand(coreReq);
+    for (SolrInputDocument doc: docs) {
+      long version = getNewClock();
+      updateCmd.setVersion(Math.abs(version));
+      updateCmd.solrDoc = doc;
+      h.getCore().getUpdateHandler().addDoc(updateCmd);
+      updateCmd.clear();
+    }
+    assertU(commit());
+  }
+
+  private long getNewClock() {
+    return h.getCore().getUpdateHandler().getUpdateLog().getVersionInfo().getNewClock();
+  }
   
   /**
    * Merges two documents like

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/core/src/test/org/apache/solr/update/processor/IgnoreLargeDocumentProcessorFactoryTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/processor/IgnoreLargeDocumentProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/IgnoreLargeDocumentProcessorFactoryTest.java
index da70fc6..1674a7d 100644
--- a/solr/core/src/test/org/apache/solr/update/processor/IgnoreLargeDocumentProcessorFactoryTest.java
+++ b/solr/core/src/test/org/apache/solr/update/processor/IgnoreLargeDocumentProcessorFactoryTest.java
@@ -19,6 +19,7 @@ package org.apache.solr.update.processor;
 
 import java.io.IOException;
 import java.nio.charset.Charset;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
@@ -31,7 +32,7 @@ import org.apache.solr.common.util.NamedList;
 import org.apache.solr.update.AddUpdateCommand;
 import org.junit.Test;
 
-import static org.apache.solr.update.processor.IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.fastEstimate;
+import static org.apache.solr.update.processor.IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.estimate;
 
 public class IgnoreLargeDocumentProcessorFactoryTest extends LuceneTestCase {
 
@@ -62,7 +63,7 @@ public class IgnoreLargeDocumentProcessorFactoryTest extends LuceneTestCase {
   public AddUpdateCommand getUpdate(int size) {
     SolrInputDocument document = new SolrInputDocument();
     document.addField(new String(new byte[size], Charset.defaultCharset()), 1L);
-    assertTrue(fastEstimate(document) > size);
+    assertTrue(IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.estimate(document) > size);
 
     AddUpdateCommand cmd = new AddUpdateCommand(null);
     cmd.solrDoc = document;
@@ -71,12 +72,12 @@ public class IgnoreLargeDocumentProcessorFactoryTest extends LuceneTestCase {
 
   @Test
   public void testEstimateObjectSize() {
-    assertEquals(fastEstimate("abc"), 6);
-    assertEquals(fastEstimate("abcdefgh"), 16);
+    assertEquals(estimate("abc"), 6);
+    assertEquals(estimate("abcdefgh"), 16);
     List<String> keys = Arrays.asList("int", "long", "double", "float", "str");
-    assertEquals(fastEstimate(keys), 42);
+    assertEquals(estimate(keys), 42);
     List<Object> values = Arrays.asList(12, 5L, 12.0, 5.0, "duck");
-    assertEquals(fastEstimate(values), 8);
+    assertEquals(estimate(values), 8);
 
     Map<String, Object> map = new HashMap<>();
     map.put("int", 12);
@@ -84,19 +85,97 @@ public class IgnoreLargeDocumentProcessorFactoryTest extends LuceneTestCase {
     map.put("double", 12.0);
     map.put("float", 5.0f);
     map.put("str", "duck");
-    assertEquals(fastEstimate(map), 50);
+    assertEquals(estimate(map), 50);
 
     SolrInputDocument document = new SolrInputDocument();
     for (Map.Entry<String, Object> entry : map.entrySet()) {
       document.addField(entry.getKey(), entry.getValue());
     }
-    assertEquals(fastEstimate(document), fastEstimate(map));
+    assertEquals(IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.estimate(document), estimate(map));
 
     SolrInputDocument childDocument = new SolrInputDocument();
     for (Map.Entry<String, Object> entry : map.entrySet()) {
       childDocument.addField(entry.getKey(), entry.getValue());
     }
     document.addChildDocument(childDocument);
-    assertEquals(fastEstimate(document), fastEstimate(map) * 2);
+    assertEquals(IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.estimate(document), estimate(map) * 2);
+  }
+
+  @Test
+  public void testEstimateObjectSizeWithSingleChild() {
+    assertEquals(estimate("abc"), 6);
+    assertEquals(estimate("abcdefgh"), 16);
+    List<String> keys = Arrays.asList("int", "long", "double", "float", "str");
+    assertEquals(estimate(keys), 42);
+    List<Object> values = Arrays.asList(12, 5L, 12.0, 5.0, "duck");
+    assertEquals(estimate(values), 8);
+    final String childDocKey = "testChildDoc";
+
+    Map<String, Object> mapWChild = new HashMap<>();
+    mapWChild.put("int", 12);
+    mapWChild.put("long", 5L);
+    mapWChild.put("double", 12.0);
+    mapWChild.put("float", 5.0f);
+    mapWChild.put("str", "duck");
+    assertEquals(estimate(mapWChild), 50);
+    Map<String, Object> childMap = new HashMap<>(mapWChild);
+
+
+    SolrInputDocument document = new SolrInputDocument();
+    for (Map.Entry<String, Object> entry : mapWChild.entrySet()) {
+      document.addField(entry.getKey(), entry.getValue());
+    }
+    assertEquals(IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.estimate(document), estimate(mapWChild));
+
+    SolrInputDocument childDocument = new SolrInputDocument();
+    for (Map.Entry<String, Object> entry : mapWChild.entrySet()) {
+      childDocument.addField(entry.getKey(), entry.getValue());
+    }
+    document.addField(childDocKey, childDocument);
+    mapWChild.put(childDocKey, childMap);
+    assertEquals(IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.estimate(document), estimate(childMap) * 2 + estimate(childDocKey));
+    assertEquals(IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.estimate(document), estimate(mapWChild));
+  }
+
+  @Test
+  public void testEstimateObjectSizeWithChildList() {
+    assertEquals(estimate("abc"), 6);
+    assertEquals(estimate("abcdefgh"), 16);
+    List<String> keys = Arrays.asList("int", "long", "double", "float", "str");
+    assertEquals(estimate(keys), 42);
+    List<Object> values = Arrays.asList(12, 5L, 12.0, 5.0, "duck");
+    assertEquals(estimate(values), 8);
+    final String childDocKey = "testChildDoc";
+
+    Map<String, Object> mapWChild = new HashMap<>();
+    mapWChild.put("int", 12);
+    mapWChild.put("long", 5L);
+    mapWChild.put("double", 12.0);
+    mapWChild.put("float", 5.0f);
+    mapWChild.put("str", "duck");
+    assertEquals(estimate(mapWChild), 50);
+    Map<String, Object> childMap = new HashMap<>(mapWChild);
+
+
+    SolrInputDocument document = new SolrInputDocument();
+    for (Map.Entry<String, Object> entry : mapWChild.entrySet()) {
+      document.addField(entry.getKey(), entry.getValue());
+    }
+    assertEquals(IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.estimate(document), estimate(mapWChild));
+
+    SolrInputDocument childDocument = new SolrInputDocument();
+    for (Map.Entry<String, Object> entry : mapWChild.entrySet()) {
+      childDocument.addField(entry.getKey(), entry.getValue());
+    }
+    List<SolrInputDocument> childList = new ArrayList<SolrInputDocument>(){
+      {
+        add(childDocument);
+        add(new SolrInputDocument(childDocument));
+      }
+    };
+    document.addField(childDocKey, childList);
+    mapWChild.put(childDocKey, childList);
+    assertEquals(IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.estimate(document), estimate(mapWChild));
+    assertEquals(IgnoreLargeDocumentProcessorFactory.ObjectSizeEstimator.estimate(document), estimate(childMap) * (childList.size() + 1) + estimate(childDocKey));
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java
index 2e69b82..43a7983 100644
--- a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java
+++ b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java
@@ -388,7 +388,6 @@ public class SolrDocument extends SolrDocumentBase<Object, SolrDocument> impleme
      }
    }
 
-   /** Returns the list of child documents, or null if none. */
    @Override
    public List<SolrDocument> getChildDocuments() {
      return _childDocuments;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/solrj/src/java/org/apache/solr/common/SolrDocumentBase.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrDocumentBase.java b/solr/solrj/src/java/org/apache/solr/common/SolrDocumentBase.java
index 7891f10..4429fbe 100644
--- a/solr/solrj/src/java/org/apache/solr/common/SolrDocumentBase.java
+++ b/solr/solrj/src/java/org/apache/solr/common/SolrDocumentBase.java
@@ -54,10 +54,20 @@ public abstract class SolrDocumentBase<T, K> implements Map<String, T>, Serializ
 
   public abstract void addChildDocuments(Collection<K> children);
 
+  /**
+   * Returns the list of <em>anonymous</em> child documents, or null if none.
+   * There may be other "labelled" child documents found in field values, in which the field name is the label.
+   * This may be deprecated in 8.0.
+   */
   public abstract List<K> getChildDocuments();
 
+  /** Has <em>anonymous</em> children? */
   public abstract boolean hasChildDocuments();
 
+  /**
+   * The <em>anonymous</em> child document count.
+   */
+  @Deprecated
   public abstract int getChildDocumentCount();
 
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java
index c8451aa..172606f 100644
--- a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java
+++ b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java
@@ -25,7 +25,7 @@ import java.util.Map;
 import java.util.Set;
 
 /**
- * Represent the field and boost information needed to construct and index
+ * Represent the field-value information needed to construct and index
  * a Lucene Document.  Like the SolrDocument, the field values should
  * match those specified in schema.xml 
  *
@@ -50,7 +50,7 @@ public class SolrInputDocument extends SolrDocumentBase<SolrInputField, SolrInpu
   }
   
   /**
-   * Remove all fields and boosts from the document
+   * Remove all fields from the document
    */
   @Override
   public void clear()
@@ -66,7 +66,7 @@ public class SolrInputDocument extends SolrDocumentBase<SolrInputField, SolrInpu
   ///////////////////////////////////////////////////////////////////
 
   /** 
-   * Add a field with implied null value for boost.
+   * Add a field value to any existing values that may or may not exist.
    * 
    * The class type of value and the name parameter should match schema.xml. 
    * schema.xml can be found in conf directory under the solr home by default.
@@ -124,7 +124,7 @@ public class SolrInputDocument extends SolrDocumentBase<SolrInputField, SolrInpu
     return _fields.keySet();
   }
   
-  /** Set a field with implied null value for boost.
+  /** Set a field value; replacing the existing value if present.
    * 
    * @param name name of the field to set
    * @param value value of the field

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/77441629/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
index 9fec7e6..01e2cae 100644
--- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
+++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
@@ -45,6 +45,7 @@ import java.security.SecureRandom;
 import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Date;
@@ -2181,16 +2182,38 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
     Iterator<String> iter1 = sdoc1.getFieldNames().iterator();
     Iterator<String> iter2 = sdoc2.getFieldNames().iterator();
 
-    if(iter1.hasNext()) {
+    while (iter1.hasNext()) {
       String key1 = iter1.next();
       String key2 = iter2.next();
 
       Object val1 = sdoc1.getFieldValues(key1);
       Object val2 = sdoc2.getFieldValues(key2);
 
-      if(!key1.equals(key2) || !val1.equals(val2)) {
+      if(!key1.equals(key2)) {
         return false;
       }
+
+      if(!(sdoc1.get(key1).getFirstValue() instanceof SolrInputDocument)) {
+        if(!val1.equals(val2)) {
+          return false;
+        }
+      } else {
+        if (!(sdoc2.get(key2).getFirstValue() instanceof SolrInputDocument)) {
+          return false;
+        }
+        Collection col1 = (Collection) val1;
+        Collection col2 = (Collection) val2;
+        if (col1.size() != col2.size()) {
+          return false;
+        }
+        Iterator<SolrInputDocument> colIter1 = col1.iterator();
+        Iterator<SolrInputDocument> colIter2 = col2.iterator();
+        while (colIter1.hasNext()) {
+          if (!compareSolrInputDocument(colIter1.next(), colIter2.next())) {
+            return false;
+          }
+        }
+      }
     }
     if(sdoc1.getChildDocuments() == null && sdoc2.getChildDocuments() == null) {
       return true;


Mime
View raw message