lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject lucene-solr:branch_7x: LUCENE-7972: DirectoryTaxonomyReader now implements Accountable
Date Wed, 04 Oct 2017 14:54:05 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 282ed910a -> db95888ef


LUCENE-7972: DirectoryTaxonomyReader now implements Accountable


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/db95888e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/db95888e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/db95888e

Branch: refs/heads/branch_7x
Commit: db95888effb14b5600106e91d21d3adb090fbd96
Parents: 282ed91
Author: Mike McCandless <mikemccand@apache.org>
Authored: Wed Oct 4 10:52:53 2017 -0400
Committer: Mike McCandless <mikemccand@apache.org>
Committed: Wed Oct 4 10:53:33 2017 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  6 +-
 .../directory/DirectoryTaxonomyReader.java      | 58 +++++++++++++++++++-
 .../taxonomy/directory/TaxonomyIndexArrays.java | 40 ++++++++++++--
 .../directory/TestDirectoryTaxonomyReader.java  | 34 +++++++++++-
 4 files changed, 129 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/db95888e/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3c7e936..823150c 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -29,7 +29,11 @@ New Features
   K-nearest-neighbor search implementation.  (Steve Rowe)
 
 * LUCENE-7975: Change the default taxonomy facets cache to a faster
-  byte[] (UTF-8) based cache.
+  byte[] (UTF-8) based cache.  (Mike McCandless)
+
+* LUCENE-7972: DirectoryTaxonomyReader, in Lucene's facet module, now
+  implements Accountable, so you can more easily track how much heap
+  it's using.  (Mike McCandless)
 
 * LUCENE-7982: A new NormsFieldExistsQuery matches documents that have
   norms in a specified field (Colin Goodheart-Smithe via Mike McCandless)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/db95888e/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
index c72e60b..cde56e1 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
@@ -17,6 +17,10 @@
 package org.apache.lucene.facet.taxonomy.directory;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
 import java.util.Map;
 import java.util.logging.Level;
 import java.util.logging.Logger;
@@ -30,12 +34,17 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.index.CorruptIndexException; // javadocs
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
 
 /**
  * A {@link TaxonomyReader} which retrieves stored taxonomy information from a
@@ -49,11 +58,14 @@ import org.apache.lucene.util.IOUtils;
  * 
  * @lucene.experimental
  */
-public class DirectoryTaxonomyReader extends TaxonomyReader {
+public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountable {
 
   private static final Logger logger = Logger.getLogger(DirectoryTaxonomyReader.class.getName());
 
   private static final int DEFAULT_CACHE_VALUE = 4000;
+
+  // NOTE: very coarse estimate!
+  private static final int BYTES_PER_CACHE_ENTRY = 4 * RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ 4 * RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 8 * RamUsageEstimator.NUM_BYTES_CHAR;
   
   private final DirectoryTaxonomyWriter taxoWriter;
   private final long taxoEpoch; // used in doOpenIfChanged 
@@ -325,7 +337,51 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
     ensureOpen();
     return indexReader.numDocs();
   }
+
+  @Override
+  public synchronized long ramBytesUsed() {
+    ensureOpen();
+    long ramBytesUsed = 0;
+    for (LeafReaderContext ctx : indexReader.leaves()) {
+      ramBytesUsed += ((SegmentReader) ctx.reader()).ramBytesUsed();
+    }
+    if (taxoArrays != null) {
+      ramBytesUsed += taxoArrays.ramBytesUsed();
+    }
+    synchronized (categoryCache) {
+      ramBytesUsed += BYTES_PER_CACHE_ENTRY * categoryCache.size();
+    }    
+
+    synchronized (ordinalCache) {
+      ramBytesUsed += BYTES_PER_CACHE_ENTRY * ordinalCache.size();
+    }    
+
+    return ramBytesUsed;
+  }
   
+  @Override
+  public synchronized Collection<Accountable> getChildResources() {
+    final List<Accountable> resources = new ArrayList<>();
+    long ramBytesUsed = 0;
+    for (LeafReaderContext ctx : indexReader.leaves()) {
+      ramBytesUsed += ((SegmentReader) ctx.reader()).ramBytesUsed();
+    }
+    resources.add(Accountables.namedAccountable("indexReader", ramBytesUsed));
+    if (taxoArrays != null) {
+      resources.add(Accountables.namedAccountable("taxoArrays", taxoArrays));
+    }
+
+    synchronized (categoryCache) {
+      resources.add(Accountables.namedAccountable("categoryCache", BYTES_PER_CACHE_ENTRY
* categoryCache.size()));
+    }    
+
+    synchronized (ordinalCache) {
+      resources.add(Accountables.namedAccountable("ordinalCache", BYTES_PER_CACHE_ENTRY *
ordinalCache.size()));
+    }    
+    
+    return Collections.unmodifiableList(resources);
+  }
+
   /**
    * setCacheSize controls the maximum allowed size of each of the caches
    * used by {@link #getPath(int)} and {@link #getOrdinal(FacetLabel)}.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/db95888e/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java
b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java
index bdd5c80..dc4d18a 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java
@@ -16,16 +16,23 @@
  */
 package org.apache.lucene.facet.taxonomy.directory;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
 import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.ArrayUtil;
-
-import java.io.IOException;
+import org.apache.lucene.util.RamUsageEstimator;
 
 /**
  * A {@link ParallelTaxonomyArrays} that are initialized from the taxonomy
@@ -33,7 +40,7 @@ import java.io.IOException;
  * 
  * @lucene.experimental
  */
-class TaxonomyIndexArrays extends ParallelTaxonomyArrays {
+class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable {
 
   private final int[] parents;
 
@@ -214,4 +221,29 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays {
     return siblings;
   }
 
+  @Override
+  public synchronized long ramBytesUsed() {
+    long ramBytesUsed = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ RamUsageEstimator.NUM_BYTES_BOOLEAN;
+    ramBytesUsed += RamUsageEstimator.shallowSizeOf(parents);
+    if (children != null) {
+      ramBytesUsed += RamUsageEstimator.shallowSizeOf(children);
+    }
+    if (siblings != null) {
+      ramBytesUsed += RamUsageEstimator.shallowSizeOf(siblings);
+    }
+    return ramBytesUsed;
+  }
+
+  @Override
+  public synchronized Collection<Accountable> getChildResources() {
+    final List<Accountable> resources = new ArrayList<>();
+    resources.add(Accountables.namedAccountable("parents", RamUsageEstimator.shallowSizeOf(parents)));
+    if (children != null) {
+      resources.add(Accountables.namedAccountable("children", RamUsageEstimator.shallowSizeOf(children)));
+    }
+    if (siblings != null) {
+      resources.add(Accountables.namedAccountable("siblings", RamUsageEstimator.shallowSizeOf(siblings)));
+    }
+    return Collections.unmodifiableList(resources);
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/db95888e/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
index 1982048..de20230 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
@@ -25,12 +25,12 @@ import java.util.Set;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.facet.FacetTestCase;
 import org.apache.lucene.facet.taxonomy.FacetLabel;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LogByteSizeMergePolicy;
 import org.apache.lucene.index.LogMergePolicy;
 import org.apache.lucene.store.AlreadyClosedException;
@@ -529,5 +529,33 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
     
     dir.close();
   }
-  
+
+  public void testAccountable() throws Exception {
+    Directory dir = newDirectory();
+    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
+    int numCategories = atLeast(10);
+    int numA = 0, numB = 0;
+    Random random = random();
+    // add the two categories for which we'll also add children (so asserts are simpler)
+    taxoWriter.addCategory(new FacetLabel("a"));
+    taxoWriter.addCategory(new FacetLabel("b"));
+    for (int i = 0; i < numCategories; i++) {
+      if (random.nextBoolean()) {
+        taxoWriter.addCategory(new FacetLabel("a", Integer.toString(i)));
+        ++numA;
+      } else {
+        taxoWriter.addCategory(new FacetLabel("b", Integer.toString(i)));
+        ++numB;
+      }
+    }
+    // add category with no children
+    taxoWriter.addCategory(new FacetLabel("c"));
+    taxoWriter.close();
+    
+    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
+    assertTrue(taxoReader.ramBytesUsed() > 0);
+    assertTrue(taxoReader.getChildResources().size() > 0);
+    taxoReader.close();
+    dir.close();
+  }
 }


Mime
View raw message