lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From munendr...@apache.org
Subject [lucene-solr] branch branch_8x updated: SOLR-12368: inplace update for field that doesn't yet exist in any doc
Date Wed, 17 Jul 2019 16:28:27 GMT
This is an automated email from the ASF dual-hosted git repository.

munendrasn pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 4c11633  SOLR-12368: inplace update for field that doesn't yet exist in any doc
4c11633 is described below

commit 4c11633c03d302590a95a30af36b743a22fc5340
Author: Munendra S N <munendrasn@apache.org>
AuthorDate: Wed Jul 17 21:25:57 2019 +0530

    SOLR-12368: inplace update for field that doesn't yet exist in any doc
    
    If the field is non-stored, non-indexed and docvalue enabled numeric field
    then inplace update can be done. previously, lucene didn't support
    docvalue update for field that is not yet present in indexWriter but
    LUCENE-8316 added support for this.
    This adds support to update field which satisfies inplace conditions
    but which doesn't yet exist in any docs
---
 solr/CHANGES.txt                                   |  5 +-
 .../processor/AtomicUpdateDocumentMerger.java      |  5 --
 .../collection1/conf/schema-inplace-updates.xml    |  1 +
 .../solr/update/TestInPlaceUpdatesDistrib.java     | 41 ++++++---
 .../solr/update/TestInPlaceUpdatesStandalone.java  | 97 ++++++++++++++++++++--
 5 files changed, 127 insertions(+), 22 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f1bd2e2..1097764 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -28,8 +28,11 @@ Velocity 2.0 and Velocity Tools 3.0
 Apache ZooKeeper 3.5.5
 Jetty 9.4.19.v20190610
 
+Improvements
+----------------------
 
-(No Changes)
+* SOLR-12368: Support InPlace DV updates for a field that does not yet exist in any documents
+(hossman, Simon Willnauer, Adrien Grand, Munendra S N)
 
 
 ==================  8.2.0 ==================
diff --git a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
index 4329ae9..79faf21 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
@@ -236,19 +236,14 @@ public class AtomicUpdateDocumentMerger {
     // third pass: requiring checks against the actual IndexWriter due to internal DV update
limitations
     SolrCore core = cmd.getReq().getCore();
     RefCounted<IndexWriter> holder = core.getSolrCoreState().getIndexWriter(core);
-    Set<String> fieldNamesFromIndexWriter = null;
     Set<String> segmentSortingFields = null;
     try {
       IndexWriter iw = holder.get();
-      fieldNamesFromIndexWriter = iw.getFieldNames(); // This shouldn't be needed once LUCENE-7659
is resolved
       segmentSortingFields = iw.getConfig().getIndexSortFields();
     } finally {
       holder.decref();
     }
     for (String fieldName: candidateFields) {
-      if (! fieldNamesFromIndexWriter.contains(fieldName) ) {
-        return Collections.emptySet(); // if this field doesn't exist, DV update can't work
-      }
       if (segmentSortingFields.contains(fieldName) ) {
         return Collections.emptySet(); // if this is used for segment sorting, DV updates
can't work
       }
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-inplace-updates.xml b/solr/core/src/test-files/solr/collection1/conf/schema-inplace-updates.xml
index cca2b1a..df9b237 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-inplace-updates.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-inplace-updates.xml
@@ -19,6 +19,7 @@
   
   <uniqueKey>id</uniqueKey>
   <field name="id" type="string" indexed="true" stored="true" docValues="true"/>
+  <field name="_root_" type="string" indexed="true" stored="true" docValues="true"/>
   <field name="_version_" type="long" indexed="false" stored="false"  docValues="true"
/>
   <field name="shardName" type="string" multiValued="false" indexed="false" required="false"
stored="true"/>
 
diff --git a/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java b/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java
index 5061805..88ef94d 100644
--- a/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java
+++ b/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesDistrib.java
@@ -392,7 +392,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase
{
     buildRandomIndex(101.0F, ids);
     
     List<Integer> luceneDocids = new ArrayList<>(numDocs);
-    List<Float> valuesList = new ArrayList<Float>(numDocs);
+    List<Number> valuesList = new ArrayList<>(numDocs);
     SolrParams params = params("q", "id:[0 TO *]", "fl", "*,[docid]", "rows", String.valueOf(numDocs),
"sort", "id_i asc");
     SolrDocumentList results = LEADER.query(params).getResults();
     assertEquals(numDocs, results.size());
@@ -403,7 +403,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase
{
     log.info("Initial results: "+results);
     
     // before we do any atomic operations, sanity check our results against all clients
-    assertDocIdsAndValuesAgainstAllClients("sanitycheck", params, luceneDocids, valuesList);
+    assertDocIdsAndValuesAgainstAllClients("sanitycheck", params, luceneDocids, "inplace_updatable_float",
valuesList);
 
     // now we're going to overwrite the value for all of our testing docs
     // giving them a value between -5 and +5
@@ -430,7 +430,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase
{
                                              "fq", "id:[0 TO *]"),
                                       // existing sort & fl that we want...
                                       params),
-       luceneDocids, valuesList);
+       luceneDocids, "inplace_updatable_float", valuesList);
       
     // update doc, w/increment
     log.info("Updating the documents...");
@@ -441,7 +441,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase
{
       // 0 test docs matching the query inplace_updatable_float:[-10 TO 10]
       final float inc = (r.nextBoolean() ? -1.0F : 1.0F) * (r.nextFloat() + (float)atLeast(20));
       assert 20 < Math.abs(inc);
-      final float value = valuesList.get(id) + inc;
+      final float value = (float)valuesList.get(id) + inc;
       assert value < -10 || 10 < value;
         
       valuesList.set(id, value);
@@ -454,7 +454,22 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase
{
                                              "fq", "id:[0 TO *]"),
                                       // existing sort & fl that we want...
                                       params),
-       luceneDocids, valuesList);
+       luceneDocids, "inplace_updatable_float", valuesList);
+
+    log.info("Updating the documents with new field...");
+    Collections.shuffle(ids, r);
+    for (int id : ids) {
+      final int val = random().nextInt(20);
+      valuesList.set(id, val);
+      index("id", id, "inplace_updatable_int", map((random().nextBoolean()?"inc": "set"),
val));
+    }
+    commit();
+
+    assertDocIdsAndValuesAgainstAllClients
+        ("inplace_for_first_field_update", SolrParams.wrapDefaults(params("q", "inplace_updatable_int:[*
TO *]",
+            "fq", "id:[0 TO *]"),
+            params),
+            luceneDocids, "inplace_updatable_int", valuesList);
     log.info("docValuesUpdateTest: This test passed fine...");
   }
 
@@ -534,12 +549,14 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase
{
    * @param debug used in log and assertion messages
    * @param req the query to execut, should include rows &amp; sort params such that
the results can be compared to luceneDocids and valuesList
    * @param luceneDocids a list of "[docid]" values to be tested against each doc in the
req results (in order)
-   * @param valuesList a list of "inplace_updatable_float" values to be tested against each
doc in the req results (in order)
+   * @param fieldName used to get value from the doc to validate with valuesList
+   * @param valuesList a list of given fieldName values to be tested against each doc in
results (in order)
    */
   private void assertDocIdsAndValuesAgainstAllClients(final String debug,
                                                       final SolrParams req,
                                                       final List<Integer> luceneDocids,
-                                                      final List<Float> valuesList)
throws Exception {
+                                                      final String fieldName,
+                                                      final List<Number> valuesList)
throws Exception {
     assert luceneDocids.size() == valuesList.size();
     final long numFoundExpected = luceneDocids.size();
     
@@ -564,7 +581,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase
{
         Thread.sleep(WAIT_TIME);          
       }
       
-      assertDocIdsAndValuesInResults(msg, results, luceneDocids, valuesList);
+      assertDocIdsAndValuesInResults(msg, results, luceneDocids, fieldName, valuesList);
     }
   }
   
@@ -575,12 +592,14 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase
{
    * @param msgPre used as a prefix for assertion messages
    * @param results the sorted results of some query, such that all matches are included
(ie: rows = numFound)
    * @param luceneDocids a list of "[docid]" values to be tested against each doc in results
(in order)
-   * @param valuesList a list of "inplace_updatable_float" values to be tested against each
doc in results (in order)
+   * @param fieldName used to get value from the doc to validate with valuesList
+   * @param valuesList a list of given fieldName values to be tested against each doc in
results (in order)
    */
   private void assertDocIdsAndValuesInResults(final String msgPre,
                                               final SolrDocumentList results,
                                               final List<Integer> luceneDocids,
-                                              final List<Float> valuesList) {
+                                              final String fieldName,
+                                              final List<Number> valuesList) {
 
     assert luceneDocids.size() == valuesList.size();
     assertEquals(msgPre + ": rows param wasn't big enough, we need to compare all results
matching the query",
@@ -590,7 +609,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase
{
     
     for (SolrDocument doc : results) {
       final int id = Integer.parseInt(doc.get("id").toString());
-      final Object val = doc.get("inplace_updatable_float");
+      final Object val = doc.get(fieldName);
       final Object docid = doc.get("[docid]");
       assertEquals(msgPre + " wrong val for " + doc.toString(), valuesList.get(id), val);
       assertEquals(msgPre + " wrong [docid] for " + doc.toString(), luceneDocids.get(id),
docid);
diff --git a/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesStandalone.java b/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesStandalone.java
index 0f9d2b7..90397c1 100644
--- a/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesStandalone.java
+++ b/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdatesStandalone.java
@@ -290,6 +290,93 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
   }
 
   @Test
+  public void testUpdatingFieldNotPresentInDoc() throws Exception {
+    long version1 = addAndGetVersion(sdoc("id", "1", "title_s", "first"), null);
+    long version2 = addAndGetVersion(sdoc("id", "2", "title_s", "second"), null);
+    long version3 = addAndGetVersion(sdoc("id", "3", "title_s", "third"), null);
+    assertU(commit("softCommit", "false"));
+    assertQ(req("q", "*:*"), "//*[@numFound='3']");
+
+    // subsequent updates shouldn't cause docid changes
+    int docid1 = getDocId("1");
+    int docid2 = getDocId("2");
+    int docid3 = getDocId("3");
+
+    // updating fields which are not present in the document
+    // tests both set and inc with different fields
+    version1 = addAndAssertVersion(version1, "id", "1", "inplace_updatable_float", map("set",
200));
+    version2 = addAndAssertVersion(version2, "id", "2", "inplace_updatable_float", map("inc",
100));
+    version3 = addAndAssertVersion(version3, "id", "3", "inplace_updatable_float", map("set",
300));
+    version1 = addAndAssertVersion(version1, "id", "1", "inplace_updatable_int", map("set",
300));
+    assertU(commit("softCommit", "false"));
+
+    assertQ(req("q", "*:*", "sort", "id asc", "fl", "*,[docid]"),
+        "//*[@numFound='3']",
+        "//result/doc[1]/float[@name='inplace_updatable_float'][.='200.0']",
+        "//result/doc[1]/int[@name='inplace_updatable_int'][.='300']",
+        "//result/doc[2]/float[@name='inplace_updatable_float'][.='100.0']",
+        "//result/doc[3]/float[@name='inplace_updatable_float'][.='300.0']",
+        "//result/doc[1]/long[@name='_version_'][.='"+version1+"']",
+        "//result/doc[2]/long[@name='_version_'][.='"+version2+"']",
+        "//result/doc[3]/long[@name='_version_'][.='"+version3+"']",
+        "//result/doc[1]/int[@name='[docid]'][.='"+docid1+"']",
+        "//result/doc[2]/int[@name='[docid]'][.='"+docid2+"']",
+        "//result/doc[3]/int[@name='[docid]'][.='"+docid3+"']"
+    );
+
+    // adding new field which is not present in any docs but matches dynamic field rule
+    // and satisfies inplace condition should be treated as inplace update
+    version1 = addAndAssertVersion(version1, "id", "1", "inplace_updatable_i_dvo", map("set",
200));
+    assertU(commit("softCommit", "false"));
+    assertQ(req("q", "id:1", "sort", "id asc", "fl", "*,[docid]"),
+        "//*[@numFound='1']",
+        "//result/doc[1]/float[@name='inplace_updatable_float'][.='200.0']",
+        "//result/doc[1]/int[@name='inplace_updatable_int'][.='300']",
+        "//result/doc[1]/int[@name='[docid]'][.='"+docid1+"']",
+        "//result/doc[1]/int[@name='inplace_updatable_i_dvo'][.='200']"
+        );
+
+    // delete everything
+    deleteAllAndCommit();
+
+    // test for document with child documents
+    SolrInputDocument doc = new SolrInputDocument();
+    doc.setField("id", "1");
+    doc.setField("title_s", "parent");
+
+    SolrInputDocument child1 = new SolrInputDocument();
+    child1.setField("id", "1_1");
+    child1.setField("title_s", "child1");
+    SolrInputDocument child2 = new SolrInputDocument();
+    child2.setField("id", "1_2");
+    child2.setField("title_s", "child2");
+
+    doc.addChildDocument(child1);
+    doc.addChildDocument(child2);
+    long version = addAndGetVersion(doc, null);
+    assertU(commit("softCommit", "false"));
+    assertQ(req("q", "*:*"), "//*[@numFound='3']");
+
+    int parentDocId = getDocId("1");
+    int childDocid1 = getDocId("1_1");
+    int childDocid2 = getDocId("1_2");
+    version = addAndAssertVersion(version, "id", "1", "inplace_updatable_float", map("set",
200));
+    version = addAndAssertVersion(version, "id", "1", "inplace_updatable_int", map("inc",
300));
+    assertU(commit("softCommit", "false"));
+
+    // first child docs would be returned followed by parent doc
+    assertQ(req("q", "*:*", "fl", "*,[docid]"),
+        "//*[@numFound='3']",
+        "//result/doc[3]/float[@name='inplace_updatable_float'][.='200.0']",
+        "//result/doc[3]/int[@name='inplace_updatable_int'][.='300']",
+        "//result/doc[3]/int[@name='[docid]'][.='"+parentDocId+"']",
+        "//result/doc[1]/int[@name='[docid]'][.='"+childDocid1+"']",
+        "//result/doc[2]/int[@name='[docid]'][.='"+childDocid2+"']"
+    );
+  }
+
+
+  @Test
   public void testUpdateTwoDifferentFields() throws Exception {
     long version1 = addAndGetVersion(sdoc("id", "1", "title_s", "first", "inplace_updatable_float",
42), null);
     assertU(commit("softCommit", "false"));
@@ -423,7 +510,7 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
    * Helper method to search for the specified (uniqueKey field) id using <code>fl=[docid]</code>

    * and return the internal lucene docid.
    */
-  private int getDocId(String id) throws NumberFormatException, Exception {
+  private int getDocId(String id) throws Exception {
     SolrDocumentList results = client.query(params("q","id:" + id, "fl", "[docid]")).getResults();
     assertEquals(1, results.getNumFound());
     assertEquals(1, results.size());
@@ -985,10 +1072,10 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
                                                "inplace_updatable_int_with_default");
     Collections.shuffle(fieldsToCheck, random()); // ... and regardless of order checked
     for (String field : fieldsToCheck) {
-      // In-place updatable field updated before it exists SHOULD NOT BE in-place updated:
+      // In-place updatable field updated before it exists SHOULD NOW BE in-place updated
(since LUCENE-8316):
       inPlaceUpdatedFields = callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_",
42L,
                                                                     field, map("set", 10)));
-      assertFalse(field, inPlaceUpdatedFields.contains(field));
+      assertTrue(field, inPlaceUpdatedFields.contains(field));
       
       // In-place updatable field updated after it exists SHOULD BE in-place updated:
       addAndGetVersion(sdoc("id", "1", field, "0"), params()); // setting up the dv
@@ -1023,7 +1110,7 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
                callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,
                                                       "inplace_updatable_int_with_default",
"100")).isEmpty());
   
-    assertTrue("non existent dynamic dv field updated first time",
+    assertFalse("non existent dynamic dv field updated first time",
                callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,
                                                       "new_updatable_int_i_dvo", map("set",
10))).isEmpty());
     
@@ -1036,7 +1123,7 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
                                                                   "new_updatable_int_i_dvo",
map("set", 10)));
     assertTrue(inPlaceUpdatedFields.contains("new_updatable_int_i_dvo"));
 
-    // for copy fields, regardless of wether the source & target support inplace updates,
+    // for copy fields, regardless of whether the source & target support inplace updates,
     // it won't be inplace if the DVs don't exist yet...
     assertTrue("inplace fields should be empty when doc has no copyfield src values yet",
                callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,


Mime
View raw message