lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dwe...@apache.org
Subject [1/2] lucene-solr:master: LUCENE-8380: UTF8TaxonomyWriterCache page/ offset calculation bug
Date Wed, 04 Jul 2018 07:15:46 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 2e22a12f0 -> cedeaf976
  refs/heads/master b7b6f242e -> 0f652627a


LUCENE-8380: UTF8TaxonomyWriterCache page/ offset calculation bug


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0f652627
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0f652627
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0f652627

Branch: refs/heads/master
Commit: 0f652627a06f036beba0a6a6d201004d7d5a365c
Parents: b7b6f24
Author: Dawid Weiss <dweiss@apache.org>
Authored: Wed Jul 4 09:06:33 2018 +0200
Committer: Dawid Weiss <dweiss@apache.org>
Committed: Wed Jul 4 09:06:33 2018 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  2 ++
 .../writercache/UTF8TaxonomyWriterCache.java    | 30 +++++++++++---------
 .../TestUTF8TaxonomyWriterCache.java            | 12 ++++++--
 3 files changed, 28 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0f652627/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4d2769f..2ac2832 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -129,6 +129,8 @@ API Changes:
 
 Bug Fixes:
 
+* LUCENE-8380: UTF8TaxonomyWriterCache inconsistency. (Ruslan Torobaev, Dawid Weiss)
+
 * LUCENE-8164: IndexWriter silently accepts broken payload. This has been fixed
   via LUCENE-8165 since we are now checking for offset+length going out of bounds.
   (Robert Muir, Nhat Nyugen, Simon Willnauer)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0f652627/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
index 1c0adf7..f27ac63 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/UTF8TaxonomyWriterCache.java
@@ -37,12 +37,14 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache,
Accou
         return new BytesRefBuilder();
       }
     };
+
   private final Counter bytesUsed = Counter.newCounter();
   private final BytesRefHash map = new BytesRefHash(new ByteBlockPool(new DirectTrackingAllocator(bytesUsed)));
 
-  private final static int ORDINALS_PAGE_SIZE = 65536;
-  private final static int ORDINALS_PAGE_MASK = ORDINALS_PAGE_SIZE - 1;
-    
+  private final static int PAGE_BITS = 16;
+  private final static int PAGE_SIZE = 1 << PAGE_BITS;
+  private final static int PAGE_MASK = PAGE_SIZE - 1;
+
   private volatile int[][] ordinals;
 
   // How many labels we are storing:
@@ -54,7 +56,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache,
Accou
   /** Sole constructor. */
   public UTF8TaxonomyWriterCache() {
     ordinals = new int[1][];
-    ordinals[0] = new int[ORDINALS_PAGE_SIZE];
+    ordinals[0] = new int[PAGE_SIZE];
   }
 
   @Override
@@ -67,16 +69,16 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache,
Accou
     if (id == -1) {
       return LabelToOrdinal.INVALID_ORDINAL;
     }
-    int page = id / ORDINALS_PAGE_SIZE;
-    int offset = id % ORDINALS_PAGE_MASK;
+    int page = id >>> PAGE_BITS;
+    int offset = id & PAGE_MASK;
     return ordinals[page][offset];
   }
 
   // Called only from assert
   private boolean assertSameOrdinal(FacetLabel label, int id, int ord) {
     id = -id - 1;
-    int page = id / ORDINALS_PAGE_SIZE;
-    int offset = id % ORDINALS_PAGE_MASK;
+    int page = id >>> PAGE_BITS;
+    int offset = id & PAGE_MASK;
     int oldOrd = ordinals[page][offset];
     if (oldOrd != ord) {
       throw new IllegalArgumentException("label " + label + " was already cached, with old
ord=" + oldOrd + " versus new ord=" + ord);
@@ -95,15 +97,15 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache,
Accou
         return false;
       }
       assert id == count;
-      int page = id / ORDINALS_PAGE_SIZE;
-      int offset = id % ORDINALS_PAGE_MASK;
+      int page = id >>> PAGE_BITS;
+      int offset = id & PAGE_MASK;
       if (page == pageCount) {
         if (page == ordinals.length) {
           int[][] newOrdinals = new int[ArrayUtil.oversize(page+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
           System.arraycopy(ordinals, 0, newOrdinals, 0, ordinals.length);
           ordinals = newOrdinals;
         }
-        ordinals[page] = new int[ORDINALS_PAGE_MASK];
+        ordinals[page] = new int[PAGE_SIZE];
         pageCount++;
       }
       ordinals[page][offset] = ord;
@@ -125,7 +127,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache,
Accou
     map.clear();
     map.reinit();
     ordinals = new int[1][];
-    ordinals[0] = new int[ORDINALS_PAGE_SIZE];
+    ordinals[0] = new int[PAGE_SIZE];
     count = 0;
     pageCount = 0;
     assert bytesUsed.get() == 0;
@@ -138,7 +140,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache,
Accou
   
   @Override
   public synchronized long ramBytesUsed() {
-    return bytesUsed.get() + pageCount * ORDINALS_PAGE_SIZE * RamUsageEstimator.NUM_BYTES_INT;
+    return bytesUsed.get() + pageCount * PAGE_SIZE * Integer.BYTES;
   }
     
   @Override
@@ -146,7 +148,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache,
Accou
   }
 
   private static final byte DELIM_CHAR = (byte) 0x1F;
-    
+
   private BytesRef toBytes(FacetLabel label) {
     BytesRefBuilder bytes = this.bytes.get();
     bytes.clear();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0f652627/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestUTF8TaxonomyWriterCache.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestUTF8TaxonomyWriterCache.java
b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestUTF8TaxonomyWriterCache.java
index c36abe1..4674824 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestUTF8TaxonomyWriterCache.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestUTF8TaxonomyWriterCache.java
@@ -28,6 +28,16 @@ import org.apache.lucene.facet.taxonomy.FacetLabel;
 import org.apache.lucene.util.TestUtil;
 
 public class TestUTF8TaxonomyWriterCache extends FacetTestCase {
+  public void testPageOverflow() throws Exception {
+    UTF8TaxonomyWriterCache cache = new UTF8TaxonomyWriterCache();
+    for (int ord = 0; ord < 65536 * 2; ord++) {
+      cache.put(new FacetLabel("foo:", Integer.toString(ord)), ord);
+    }
+
+    for (int ord = 0; ord < 65536 * 2; ord++) {
+      assertEquals(ord, cache.get(new FacetLabel("foo:", Integer.toString(ord))));
+    }
+  }
 
   public void testRandom() throws Exception {
     LabelToOrdinal map = new LabelToOrdinalMap();
@@ -37,8 +47,6 @@ public class TestUTF8TaxonomyWriterCache extends FacetTestCase {
     final int n = atLeast(10 * 1000);
     final int numUniqueValues = 50 * 1000;
 
-    byte[] buffer = new byte[50];
-
     Random random = random();
     Set<String> uniqueValuesSet = new HashSet<>();
     while (uniqueValuesSet.size() < numUniqueValues) {


Mime
View raw message