lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sar...@apache.org
Subject [1/2] lucene-solr:branch_6x: SOLR-8838: Returning non-stored docValues is incorrect for negative floats and doubles.
Date Wed, 16 Mar 2016 23:01:38 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 14752476f -> e30551db2


SOLR-8838: Returning non-stored docValues is incorrect for negative floats and doubles.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efad5807
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efad5807
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efad5807

Branch: refs/heads/branch_6x
Commit: efad5807b6b3e109201ba11c218750ee1e444185
Parents: 1475247
Author: Steve Rowe <sarowe@apache.org>
Authored: Wed Mar 16 18:56:23 2016 -0400
Committer: Steve Rowe <sarowe@apache.org>
Committed: Wed Mar 16 19:00:48 2016 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../apache/solr/search/SolrIndexSearcher.java   |   4 +-
 .../solr/schema/TestUseDocValuesAsStored.java   | 158 +++++++++++++++----
 3 files changed, 128 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efad5807/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b5b1a04..0eac27c 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -316,6 +316,9 @@ Bug Fixes
 * SOLR-8835: JSON Facet API: fix faceting exception on multi-valued numeric fields that
   have docValues. (yonik)
 
+* SOLR-8838: Returning non-stored docValues is incorrect for negative floats and doubles.
+  (Ishan Chattopadhyaya, Steve Rowe)
+
 Optimizations
 ----------------------
 * SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not
been

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efad5807/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index 39fc8dd..4c9790a 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -816,9 +816,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,
SolrI
             if (schemaField.getType() instanceof TrieIntField) {
               newVal = val.intValue();
             } else if (schemaField.getType() instanceof TrieFloatField) {
-              newVal = NumericUtils.sortableIntToFloat(val.intValue());
+              newVal = Float.intBitsToFloat(val.intValue());
             } else if (schemaField.getType() instanceof TrieDoubleField) {
-              newVal = NumericUtils.sortableLongToDouble(val);
+              newVal = Double.longBitsToDouble(val);
             } else if (schemaField.getType() instanceof TrieDateField) {
               newVal = new Date(val);
             } else if (schemaField.getType() instanceof EnumField) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efad5807/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java b/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java
index 839121a..ac245cd 100644
--- a/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java
+++ b/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java
@@ -16,15 +16,36 @@
  */
 package org.apache.solr.schema;
 
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathFactory;
 import java.io.File;
 
-import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.Month;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
 
 import org.apache.commons.io.FileUtils;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.TestUtil;
 import org.apache.solr.core.AbstractBadConfigTestBase;
+import org.apache.solr.util.DOMUtil;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
 
 /**
  * Tests the useDocValuesAsStored functionality.
@@ -38,30 +59,56 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase
{
   
   private static final String collection = "collection1";
   private static final String confDir = collection + "/conf";
-  
+
+  private static final long START_RANDOM_EPOCH_MILLIS;
+  private static final long END_RANDOM_EPOCH_MILLIS;
+  private static final String[] SEVERITY;
+
+  // http://www.w3.org/TR/2006/REC-xml-20060816/#charsets
+  private static final String NON_XML_CHARS = "\u0000-\u0008\u000B-\u000C\u000E-\u001F\uFFFE\uFFFF";
+  // Avoid single quotes (problematic in XPath literals) and carriage returns (XML roundtripping
fails)
+  private static final Pattern BAD_CHAR_PATTERN = Pattern.compile("[\'\r" + NON_XML_CHARS
+ "]");
+  private static final Pattern STORED_FIELD_NAME_PATTERN = Pattern.compile("_dv$");
+
+  static {
+    // Copy of DateTimeFormatter.ISO_INSTANT with fixed 3 digit milliseconds
+    START_RANDOM_EPOCH_MILLIS = LocalDateTime.of(1970, Month.JANUARY, 1, 0, 0)
+        .toInstant(ZoneOffset.UTC).toEpochMilli();
+    END_RANDOM_EPOCH_MILLIS = LocalDateTime.of(2030, Month.DECEMBER, 31, 23, 59, 59, 999_000_000)
+        .toInstant(ZoneOffset.UTC).toEpochMilli();
+    try {
+      DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
+      InputStream stream = TestUseDocValuesAsStored.class.getResourceAsStream("/solr/collection1/conf/enumsConfig.xml");
+      Document doc = builder.parse(new InputSource(IOUtils.getDecodingReader(stream, StandardCharsets.UTF_8)));
+      XPath xpath = XPathFactory.newInstance().newXPath();
+      NodeList nodes = (NodeList)xpath.evaluate
+          ("/enumsConfig/enum[@name='severity']/value", doc, XPathConstants.NODESET);
+      SEVERITY = new String[nodes.getLength()];
+      for (int i = 0 ; i < nodes.getLength() ; ++i) {
+        SEVERITY[i] = DOMUtil.getText(nodes.item(i));
+      }
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
   @Before
   private void initManagedSchemaCore() throws Exception {
     tmpSolrHome = createTempDir().toFile();
     tmpConfDir = new File(tmpSolrHome, confDir);
     File testHomeConfDir = new File(TEST_HOME(), confDir);
     FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig-managed-schema.xml"),
tmpConfDir);
-    FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig-basic.xml"), tmpConfDir);
     FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig.snippet.randomindexconfig.xml"),
tmpConfDir);
-    FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-one-field-no-dynamic-field.xml"),
tmpConfDir);
-    FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-one-field-no-dynamic-field-unique-key.xml"),
tmpConfDir);
     FileUtils.copyFileToDirectory(new File(testHomeConfDir, "enumsConfig.xml"), tmpConfDir);
     FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-non-stored-docvalues.xml"),
tmpConfDir);
-    FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-minimal.xml"), tmpConfDir);
-    FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema_codec.xml"), tmpConfDir);
-    FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-bm25.xml"), tmpConfDir);
-    
+
     // initCore will trigger an upgrade to managed schema, since the solrconfig has
     // <schemaFactory class="ManagedIndexSchemaFactory" ... />
     System.setProperty("enable.update.log", "false");
     System.setProperty("managed.schema.mutable", "true");
     initCore("solrconfig-managed-schema.xml", "schema-non-stored-docvalues.xml", tmpSolrHome.getPath());
   }
-  
+
   @After
   private void afterClass() throws Exception {
     deleteCore();
@@ -116,31 +163,68 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase
{
   }
 
   @Test
-  public void testSinglyValued() throws IOException {
-    clearIndex();
-    doTest("check string value is correct", "test_s_dvo", "str", "keyword");
-    doTest("check int value is correct", "test_i_dvo", "int", "1234");
-    doTest("check double value is correct", "test_d_dvo", "double", "1.234");
-    doTest("check long value is correct", "test_l_dvo", "long", "12345");
-    doTest("check float value is correct", "test_f_dvo", "float", "1.234");
-    doTest("check dt value is correct", "test_dt_dvo", "date", "1976-07-04T12:08:56.235Z");
-    doTest("check stored and docValues value is correct", "test_s_dv", "str", "storedAndDocValues");
-    doTest("check non-stored and non-indexed is accessible", "test_s_dvo2", "str", "gotIt");
-    doTest("enumField", "enum_dvo", "str", "Critical");
+  public void testRandomSingleAndMultiValued() throws Exception {
+    for (int c = 0 ; c < 10 * RANDOM_MULTIPLIER ; ++c) {
+      clearIndex();
+      int[] arity = new int[9];
+      for (int a = 0 ; a < arity.length ; ++a) {
+        // Single-valued 50% of the time; other 50%: 2-10 values equally likely
+        arity[a] = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 10);
+      }
+      doTest("check string value is correct", dvStringFieldName(arity[0], true, false), "str",
nextValues(arity[0], "str"));
+      doTest("check int value is correct", "test_i" + plural(arity[1]) + "_dvo", "int", nextValues(arity[1],
"int"));
+      doTest("check double value is correct", "test_d" + plural(arity[2]) + "_dvo", "double",
nextValues(arity[2], "double"));
+      doTest("check long value is correct", "test_l" + plural(arity[3]) + "_dvo", "long",
nextValues(arity[3], "long"));
+      doTest("check float value is correct", "test_f" + plural(arity[4]) + "_dvo", "float",
nextValues(arity[4], "float"));
+      doTest("check date value is correct", "test_dt" + plural(arity[5]) + "_dvo", "date",
nextValues(arity[5], "date"));
+      doTest("check stored and docValues value is correct", dvStringFieldName(arity[6], true,
true), "str", nextValues(arity[6], "str"));
+      doTest("check non-stored and non-indexed is accessible", dvStringFieldName(arity[7],
false, false), "str", nextValues(arity[7], "str"));
+      doTest("enumField", "enum" + plural(arity[8]) + "_dvo", "str", nextValues(arity[8],
"enum"));
+    }
   }
 
-  @Test
-  public void testMultiValued() throws IOException {
-    clearIndex();
-    doTest("check string value is correct", "test_ss_dvo", "str", "keyword", "keyword2");
-    doTest("check int value is correct", "test_is_dvo", "int", "1234", "12345");
-    doTest("check double value is correct", "test_ds_dvo", "double", "1.234", "12.34", "123.4");
-    doTest("check long value is correct", "test_ls_dvo", "long", "12345", "123456");
-    doTest("check float value is correct", "test_fs_dvo", "float", "1.234", "12.34");
-    doTest("check dt value is correct", "test_dts_dvo", "date", "1976-07-04T12:08:56.235Z",
"1978-07-04T12:08:56.235Z");
-    doTest("check stored and docValues value is correct", "test_ss_dv", "str", "storedAndDocValues",
"storedAndDocValues2");
-    doTest("check non-stored and non-indexed is accessible", "test_ss_dvo2", "str", "gotIt",
"gotIt2");
-    doTest("enumField", "enums_dvo", "str", "High", "Critical");
+  private String plural(int arity) {
+    return arity > 1 ? "s" : "";
+  }
+
+  private static boolean isStoredField(String fieldName) {
+    return STORED_FIELD_NAME_PATTERN.matcher(fieldName).find();
+  }
+
+  private String dvStringFieldName(int arity, boolean indexed, boolean stored) {
+    String base = "test_s" + (arity > 1 ? "s": "");
+    String suffix = "";
+    if (indexed && stored) suffix = "_dv";
+    else if (indexed && ! stored) suffix = "_dvo";
+    else if ( ! indexed && ! stored) suffix = "_dvo2";
+    else assertTrue("unsupported dv string field combination: stored and not indexed", false);
+    return base + suffix;
+  }
+
+  private String[] nextValues(int arity, String valueType) throws Exception {
+    String[] values = new String[arity];
+    for (int i = 0 ; i < arity ; ++i) {
+      switch (valueType) {
+        case "int": values[i] = String.valueOf(random().nextInt()); break;
+        case "double": values[i] = String.valueOf(Double.longBitsToDouble(random().nextLong()));
break;
+        case "long": values[i] = String.valueOf(random().nextLong()); break;
+        case "float": values[i] = String.valueOf(Float.intBitsToFloat(random().nextInt()));
break;
+        case "enum": values[i] = SEVERITY[TestUtil.nextInt(random(), 0, SEVERITY.length -
1)]; break;
+        case "str": {
+          String str = TestUtil.randomRealisticUnicodeString(random());
+          values[i] = BAD_CHAR_PATTERN.matcher(str).replaceAll("\uFFFD");
+          break;
+        }
+        case "date": {
+          long epochMillis = TestUtil.nextLong(random(), START_RANDOM_EPOCH_MILLIS, END_RANDOM_EPOCH_MILLIS);
+          LocalDateTime dateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis),
ZoneOffset.UTC);
+          values[i] = dateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + 'Z';
+          break;
+        }
+        default: throw new Exception("unknown type '" + valueType + "'");
+      }
+    }
+    return values;
   }
 
   @Test
@@ -198,6 +282,8 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase
{
     String[] xpaths = new String[value.length + 1];
 
     if (value.length > 1) {
+      Set<String> valueSet = new HashSet<>();
+      valueSet.addAll(Arrays.asList(value));
       String[] fieldAndValues = new String[value.length * 2 + 2];
       fieldAndValues[0] = "id";
       fieldAndValues[1] = id;
@@ -205,10 +291,12 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase
{
       for (int i = 0; i < value.length; ++i) {
         fieldAndValues[i * 2 + 2] = field;
         fieldAndValues[i * 2 + 3] = value[i];
-        xpaths[i] = "//arr[@name='" + field + "']/" + type + "[" + (i + 1) + "][.='" + value[i]
+ "']";
+        xpaths[i] = "//arr[@name='" + field + "']/" + type + "[.='" + value[i] + "']";
       }
 
-      xpaths[value.length] = "*[count(//arr[@name='" + field + "']/" + type + ") = " + value.length
+ "]";
+      // Docvalues are sets, but stored values are ordered multisets, so cardinality depends
on the value source
+      xpaths[value.length] = "*[count(//arr[@name='" + field + "']/" + type + ") = "
+          + (isStoredField(field) ? value.length : valueSet.size()) + "]";
       assertU(adoc(fieldAndValues));
 
     } else {


Mime
View raw message