cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jbel...@apache.org
Subject svn commit: r1224679 - in /cassandra/trunk: CHANGES.txt src/java/org/apache/cassandra/io/sstable/SSTable.java src/java/org/apache/cassandra/io/sstable/SSTableReader.java src/java/org/apache/cassandra/utils/EstimatedHistogram.java
Date Mon, 26 Dec 2011 05:21:06 GMT
Author: jbellis
Date: Mon Dec 26 05:21:06 2011
New Revision: 1224679

URL: http://svn.apache.org/viewvc?rev=1224679&view=rev
Log:
Optimize key count estimation when opening sstable on startup
patch by Melvin Wang and jbellis; reviewed by slebresne for CASSANDRA-2988

Modified:
    cassandra/trunk/CHANGES.txt
    cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java
    cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
    cassandra/trunk/src/java/org/apache/cassandra/utils/EstimatedHistogram.java

Modified: cassandra/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/cassandra/trunk/CHANGES.txt?rev=1224679&r1=1224678&r2=1224679&view=diff
==============================================================================
--- cassandra/trunk/CHANGES.txt (original)
+++ cassandra/trunk/CHANGES.txt Mon Dec 26 05:21:06 2011
@@ -1,4 +1,6 @@
 1.1-dev
+ * Optimize key count estimation when opening sstable on startup
+   (CASSANDRA-2988)
  * multi-dc replication optimization supporting CL > ONE (CASSANDRA-3577)
  * add command to stop compactions (CASSANDRA-1740, 3566, 3582)
  * multithreaded streaming (CASSANDRA-3494)

Modified: cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java?rev=1224679&r1=1224678&r2=1224679&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java Mon Dec 26 05:21:06
2011
@@ -207,25 +207,6 @@ public abstract class SSTable
         return components;
     }
 
-    /** @return An estimate of the number of keys contained in the given data file. */
-    static long estimateRowsFromData(Descriptor desc, RandomAccessReader dfile) throws IOException
-    {
-        // collect sizes for the first 1000 keys, or first 100 megabytes of data
-        final int SAMPLES_CAP = 1000, BYTES_CAP = (int)Math.min(100000000, dfile.length());
-        int keys = 0;
-        long dataPosition = 0;
-        while (dataPosition < BYTES_CAP && keys < SAMPLES_CAP)
-        {
-            dfile.seek(dataPosition);
-            ByteBufferUtil.skipShortLength(dfile);
-            long dataSize = SSTableReader.readRowSize(dfile, desc);
-            dataPosition = dfile.getFilePointer() + dataSize;
-            keys++;
-        }
-        dfile.seek(0);
-        return dfile.length() / (dataPosition / keys);
-    }
-
     /** @return An estimate of the number of keys contained in the given index file. */
     static long estimateRowsFromIndex(RandomAccessReader ifile) throws IOException
     {

Modified: cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java?rev=1224679&r1=1224678&r2=1224679&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java Mon Dec 26
05:21:06 2011
@@ -335,11 +335,14 @@ public class SSTableReader extends SSTab
         try
         {
             long indexSize = input.length();
-            long estimatedKeys = SSTable.estimateRowsFromIndex(input);
+            long histogramCount = sstableMetadata.estimatedRowSize.count();
+            long estimatedKeys = histogramCount > 0 && !sstableMetadata.estimatedRowSize.isOverflowed()
+                               ? histogramCount
+                               : SSTable.estimateRowsFromIndex(input); // statistics is supposed
to be optional
             indexSummary = new IndexSummary(estimatedKeys);
             if (recreatebloom)
-                // estimate key count based on index length
                 bf = LegacyBloomFilter.getFilter(estimatedKeys, 15);
+
             while (true)
             {
                 long indexPosition = input.getFilePointer();

Modified: cassandra/trunk/src/java/org/apache/cassandra/utils/EstimatedHistogram.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/utils/EstimatedHistogram.java?rev=1224679&r1=1224678&r2=1224679&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/utils/EstimatedHistogram.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/utils/EstimatedHistogram.java Mon Dec 26
05:21:06 2011
@@ -183,6 +183,17 @@ public class EstimatedHistogram
     }
 
     /**
+     * @return the total number of non-zero values
+     */
+    public long count()
+    {
+       long sum = 0L;
+       for (int i = 0; i < buckets.length(); i++) 
+           sum += buckets.get(i);
+       return sum;
+    }
+
+    /**
      * @return true if this histogram has overflowed -- that is, a value larger than our
largest bucket could bound was added
      */
     public boolean isOverflowed()



Mime
View raw message