lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject lucene-solr:branch_7x: LUCENE-7891: use a non-buggy LRU cache in Lucene's taxonomy facets, by default
Date Tue, 05 Sep 2017 14:14:41 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 331fa4772 -> 7fabb47dc


LUCENE-7891: use a non-buggy LRU cache in Lucene's taxonomy facets, by default


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7fabb47d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7fabb47d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7fabb47d

Branch: refs/heads/branch_7x
Commit: 7fabb47dc357d559ff391a1f6b60a1298d1e095c
Parents: 331fa47
Author: Mike McCandless <mikemccand@apache.org>
Authored: Tue Sep 5 10:13:58 2017 -0400
Committer: Mike McCandless <mikemccand@apache.org>
Committed: Tue Sep 5 10:14:26 2017 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 ++
 .../writercache/LruTaxonomyWriterCache.java     | 20 ++++----
 .../writercache/TestLruTaxonomyWriterCache.java | 50 ++++++++++++++++++++
 3 files changed, 65 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7fabb47d/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index d2f57ae..70d2d16 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -42,6 +42,9 @@ Bug Fixes
   not recommended, lucene-analyzers-icu contains binary data structures
   specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir)
 
+* LUCENE-7891: Lucene's taxonomy facets now uses a non-buggy LRU cache
+  by default.  (Jan-Willem van den Broek via Mike McCandless)
+
 Build
 
 * SOLR-11181: Switch order of maven artifact publishing procedure: deploy first

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7fabb47d/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/LruTaxonomyWriterCache.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/LruTaxonomyWriterCache.java
b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/LruTaxonomyWriterCache.java
index 828e2b6..6dc8cd2 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/LruTaxonomyWriterCache.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/LruTaxonomyWriterCache.java
@@ -32,8 +32,12 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
    * function, LRU_STRING should be used.
    */
   public enum LRUType {
-    /** Use the label's hash as the key; this can lead to
-     *  silent conflicts! */
+    /** Use only the label's 64 bit longHashCode as the hash key. Do not
+     *  check equals, unlike most hash maps.
+     *  Note that while these hashes are very likely to be unique, the chance
+     *  of a collision is still greater than zero. If such an unlikely event
+     *  occurs, your document will get an incorrect facet.
+     */
     LRU_HASHED,
 
     /** Use the label as the hash key; this is always
@@ -43,15 +47,15 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
 
   private NameIntCacheLRU cache;
 
-  /** Creates this with {@link LRUType#LRU_HASHED} method. */
+  /** Creates this with {@link LRUType#LRU_STRING} method. */
   public LruTaxonomyWriterCache(int cacheSize) {
     // TODO (Facet): choose between NameHashIntCacheLRU and NameIntCacheLRU.
     // For guaranteed correctness - not relying on no-collisions in the hash
     // function, NameIntCacheLRU should be used:
     // On the other hand, NameHashIntCacheLRU takes less RAM but if there
-    // are collisions (which we never found) two different paths would be
-    // mapped to the same ordinal...
-    this(cacheSize, LRUType.LRU_HASHED);
+    // are collisions two different paths would be mapped to the same
+    // ordinal...
+    this(cacheSize, LRUType.LRU_STRING);
   }
 
   /** Creates this with the specified method. */
@@ -60,8 +64,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
     // For guaranteed correctness - not relying on no-collisions in the hash
     // function, NameIntCacheLRU should be used:
     // On the other hand, NameHashIntCacheLRU takes less RAM but if there
-    // are collisions (which we never found) two different paths would be
-    // mapped to the same ordinal...
+    // are collisions two different paths would be mapped to the same
+    // ordinal...
     if (lruType == LRUType.LRU_HASHED) {
       this.cache = new NameHashIntCacheLRU(cacheSize);
     } else {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7fabb47d/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestLruTaxonomyWriterCache.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestLruTaxonomyWriterCache.java
b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestLruTaxonomyWriterCache.java
new file mode 100644
index 0000000..972b296
--- /dev/null
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestLruTaxonomyWriterCache.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.facet.taxonomy.writercache;
+
+import org.apache.lucene.facet.FacetTestCase;
+import org.apache.lucene.facet.taxonomy.FacetLabel;
+import org.junit.Test;
+
+public class TestLruTaxonomyWriterCache extends FacetTestCase {
+
+  @Test
+  public void testDefaultLRUTypeIsCollisionSafe() {
+    // These labels are clearly different, but have identical longHashCodes.
+    // Note that these labels are clearly contrived. We did encounter
+    // collisions in actual production data, but we aren't allowed to publish
+    // those.
+    final FacetLabel a = new FacetLabel("\0", "\u0003\uFFE2");
+    final FacetLabel b = new FacetLabel("\1", "\0");
+    // If this fails, then the longHashCode implementation has changed. This
+    // cannot prevent collisions. (All hashes must allow for collisions.) It
+    // will however stop the rest of this test from making sense. To fix, find
+    // new colliding labels, or make a subclass of FacetLabel that produces
+    // collisions.
+    assertEquals(a.longHashCode(), b.longHashCode());
+    // Make a cache with capacity > 2 so both our labels will fit. Don't
+    // specify an LRUType, since we want to check if the default is
+    // collision-safe.
+    final LruTaxonomyWriterCache cache = new LruTaxonomyWriterCache(10);
+    cache.put(a, 0);
+    cache.put(b, 1);
+    assertEquals(cache.get(a), 0);
+    assertEquals(cache.get(b), 1);
+  }
+
+}


Mime
View raw message