lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sha...@apache.org
Subject svn commit: r1611853 - in /lucene/dev/branches/branch_4x: ./ solr/ solr/CHANGES.txt solr/core/ solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
Date Sat, 19 Jul 2014 07:11:06 GMT
Author: shalin
Date: Sat Jul 19 07:11:05 2014
New Revision: 1611853

URL: http://svn.apache.org/r1611853
Log:
SOLR-6259: Reduce CPU usage by avoiding repeated costly calls to Document.getField inside
DocumentBuilder.toDocument for use-cases with large number of fields and copyFields

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java

Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1611853&r1=1611852&r2=1611853&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Sat Jul 19 07:11:05 2014
@@ -135,6 +135,10 @@ Optimizations
   indexes with many fields of same type just use one TokenStream per thread.
   (Shay Banon, Uwe Schindler, Robert Muir)
 
+* SOLR-6259: Reduce CPU usage by avoiding repeated costly calls to Document.getField inside
+  DocumentBuilder.toDocument for use-cases with large number of fields and copyFields.
+  (Steven Bower via shalin)
+
 Other Changes
 ---------------------
 

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java?rev=1611853&r1=1611852&r2=1611853&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
(original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
Sat Jul 19 07:11:05 2014
@@ -18,6 +18,7 @@
 package org.apache.solr.update;
 
 import java.util.List;
+import java.util.Set;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -29,6 +30,8 @@ import org.apache.solr.schema.CopyField;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 
+import com.google.common.collect.Sets;
+
 /**
  *
  */
@@ -77,6 +80,7 @@ public class DocumentBuilder {
   { 
     Document out = new Document();
     final float docBoost = doc.getDocumentBoost();
+    Set<String> usedFields = Sets.newHashSet();
     
     // Load fields from SolrDocument to Document
     for( SolrInputField field : doc ) {
@@ -105,6 +109,9 @@ public class DocumentBuilder {
       // it ourselves 
       float compoundBoost = fieldBoost * docBoost;
 
+      List<CopyField> copyFields = schema.getCopyFieldsList(name);
+      if( copyFields.size() == 0 ) copyFields = null;
+
       // load each field value
       boolean hasField = false;
       try {
@@ -116,48 +123,52 @@ public class DocumentBuilder {
           if (sfield != null) {
             used = true;
             addField(out, sfield, v, applyBoost ? compoundBoost : 1f);
+            // record the field as having a value
+            usedFields.add(sfield.getName());
           }
   
           // Check if we should copy this field value to any other fields.
           // This could happen whether it is explicit or not.
-          List<CopyField> copyFields = schema.getCopyFieldsList(name);
-          for (CopyField cf : copyFields) {
-            SchemaField destinationField = cf.getDestination();
-
-            final boolean destHasValues = 
-              (null != out.getField(destinationField.getName()));
-
-            // check if the copy field is a multivalued or not
-            if (!destinationField.multiValued() && destHasValues) {
-              throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
-                      "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued
copy field " +
-                              destinationField.getName() + ": " + v);
-            }
+          if( copyFields != null ){
+            for (CopyField cf : copyFields) {
+              SchemaField destinationField = cf.getDestination();
   
-            used = true;
-            
-            // Perhaps trim the length of a copy field
-            Object val = v;
-            if( val instanceof String && cf.getMaxChars() > 0 ) {
-              val = cf.getLimitedValue((String)val);
+              final boolean destHasValues = usedFields.contains(destinationField.getName());
+  
+              // check if the copy field is a multivalued or not
+              if (!destinationField.multiValued() && destHasValues) {
+                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                        "ERROR: "+getID(doc, schema)+"multiple values encountered for non
multiValued copy field " +
+                                destinationField.getName() + ": " + v);
+              }
+    
+              used = true;
+              
+              // Perhaps trim the length of a copy field
+              Object val = v;
+              if( val instanceof String && cf.getMaxChars() > 0 ) {
+                val = cf.getLimitedValue((String)val);
+              }
+  
+              // we can't copy any boost unless the dest field is 
+              // indexed & !omitNorms, but which boost we copy depends
+              // on whether the dest field already contains values (we
+              // don't want to apply the compounded docBoost more then once)
+              final float destBoost = 
+                (destinationField.indexed() && !destinationField.omitNorms()) ?
+                (destHasValues ? fieldBoost : compoundBoost) : 1.0F;
+              
+              addField(out, destinationField, val, destBoost);
+              // record the field as having a value
+              usedFields.add(destinationField.getName());
             }
-
-            // we can't copy any boost unless the dest field is 
-            // indexed & !omitNorms, but which boost we copy depends
-            // on whether the dest field already contains values (we
-            // don't want to apply the compounded docBoost more then once)
-            final float destBoost = 
-              (destinationField.indexed() && !destinationField.omitNorms()) ?
-              (destHasValues ? fieldBoost : compoundBoost) : 1.0F;
             
-            addField(out, destinationField, val, destBoost);
+            // The final boost for a given field named is the product of the 
+            // *all* boosts on values of that field. 
+            // For multi-valued fields, we only want to set the boost on the
+            // first field.
+            fieldBoost = compoundBoost = 1.0f;
           }
-          
-          // The final boost for a given field named is the product of the 
-          // *all* boosts on values of that field. 
-          // For multi-valued fields, we only want to set the boost on the
-          // first field.
-          fieldBoost = compoundBoost = 1.0f;
         }
       }
       catch( SolrException ex ) {



Mime
View raw message