cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jbel...@apache.org
Subject svn commit: r802185 - in /incubator/cassandra/trunk: conf/storage-conf.xml src/java/org/apache/cassandra/config/DatabaseDescriptor.java src/java/org/apache/cassandra/io/SSTableWriter.java test/system/stress.py
Date Fri, 07 Aug 2009 20:57:03 GMT
Author: jbellis
Date: Fri Aug  7 20:57:02 2009
New Revision: 802185

URL: http://svn.apache.org/viewvc?rev=802185&view=rev
Log:
add SSTableWriter buffer size option; increase default.
patch by jbellis; reviewed by Sammy Yu for CASSANDRA-339

Modified:
    incubator/cassandra/trunk/conf/storage-conf.xml
    incubator/cassandra/trunk/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
    incubator/cassandra/trunk/src/java/org/apache/cassandra/io/SSTableWriter.java
    incubator/cassandra/trunk/test/system/stress.py

Modified: incubator/cassandra/trunk/conf/storage-conf.xml
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/conf/storage-conf.xml?rev=802185&r1=802184&r2=802185&view=diff
==============================================================================
--- incubator/cassandra/trunk/conf/storage-conf.xml (original)
+++ incubator/cassandra/trunk/conf/storage-conf.xml Fri Aug  7 20:57:02 2009
@@ -190,29 +190,41 @@
     <!-- Memory, Disk, and Performance                                        -->
     <!--======================================================================-->
 
-    <!-- Add column indexes to a row after its contents reach this size -->
-    <ColumnIndexSizeInKB>256</ColumnIndexSizeInKB>
+    <!-- Buffer size to use when flushing memtables to disk.
+         (Only one memtable is ever flushed at a time.)
+         Increase (decrease) the index buffer size relative to the data buffer
+         if you have few (many) columns per key. -->
+    <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
+    <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
+
+    <!-- Add column indexes to a row after its contents reach this size.
+         Increase if your column values are large, or if you have a very large
+         number of columns.  The competing causes are, Cassandra has to
+         deserialize this much of the row to read a single column, so you
+         want it to be small - at least if you do many partial-row reads
+         - but all the index data is read for each access, so
+         you don't want to generate that wastefully either. -->
+    <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
 
     <!--
-      The maximum amount of data to store in memory before flushing to
+      The maximum amount of data to store in memory per ColumnFamily before flushing to
       disk. Note: There is one memtable per column family, and this threshold
       is based solely on the amount of data stored, not actual heap memory
       usage (there is some overhead in indexing the columns).
     -->
-    <MemtableSizeInMB>32</MemtableSizeInMB>
-
+    <MemtableSizeInMB>64</MemtableSizeInMB>
     <!--
-      The maximum number of columns in millions to store in memory
+      The maximum number of columns in millions to store in memory per ColumnFamily
       before flushing to disk.  This is also a per-memtable setting.
       Use with MemtableSizeInMB to tune memory usage.
     -->
-    <MemtableObjectCountInMillions>0.01</MemtableObjectCountInMillions>
+    <MemtableObjectCountInMillions>0.1</MemtableObjectCountInMillions>
 
     <!-- Unlike most systems, in Cassandra writes are faster than
          reads, so you can afford more of those in parallel.
 	 A good rule of thumb is 2 concurrent reads per processor core.
-         You especially want more concurrentwrites if you are using
-         CommitLogSync + CommitLogSyncDelay. -->
+         Increase ConcurrentWrites to the number of clients writing
+         at once if you enable CommitLogSync + CommitLogSyncDelay. -->
     <ConcurrentReads>8</ConcurrentReads>
     <ConcurrentWrites>32</ConcurrentWrites>
 

Modified: incubator/cassandra/trunk/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/java/org/apache/cassandra/config/DatabaseDescriptor.java?rev=802185&r1=802184&r2=802185&view=diff
==============================================================================
--- incubator/cassandra/trunk/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
(original)
+++ incubator/cassandra/trunk/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
Fri Aug  7 20:57:02 2009
@@ -67,6 +67,9 @@
     private static int consistencyThreads_ = 4; // not configurable
     private static int concurrentReaders_ = 8;
     private static int concurrentWriters_ = 32;
+
+    private static int flushDataBufferSizeInMB_ = 32;
+    private static int flushIndexBufferSizeInMB_ = 32;
     private static List<String> tables_ = new ArrayList<String>();
     private static Set<String> applicationColumnFamilies_ = new HashSet<String>();
 
@@ -224,6 +227,17 @@
                 concurrentWriters_ = Integer.parseInt(rawWriters);
             }
 
+            String rawFlushData = xmlUtils.getNodeValue("/Storage/FlushDataBufferSizeInMB");
+            if (rawFlushData != null)
+            {
+                flushDataBufferSizeInMB_ = Integer.parseInt(rawFlushData);
+            }
+            String rawFlushIndex = xmlUtils.getNodeValue("/Storage/FlushIndexBufferSizeInMB");
+            if (rawFlushIndex != null)
+            {
+                flushIndexBufferSizeInMB_ = Integer.parseInt(rawFlushIndex);
+            }
+
             /* TCP port on which the storage system listens */
             String port = xmlUtils.getNodeValue("/Storage/StoragePort");
             if ( port != null )
@@ -909,4 +923,14 @@
     {
         return commitLogSync_;
     }
+
+    public static int getFlushDataBufferSizeInMB()
+    {
+        return flushDataBufferSizeInMB_;
+    }
+
+    public static int getFlushIndexBufferSizeInMB()
+    {
+        return flushIndexBufferSizeInMB_;
+    }
 }

Modified: incubator/cassandra/trunk/src/java/org/apache/cassandra/io/SSTableWriter.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/java/org/apache/cassandra/io/SSTableWriter.java?rev=802185&r1=802184&r2=802185&view=diff
==============================================================================
--- incubator/cassandra/trunk/src/java/org/apache/cassandra/io/SSTableWriter.java (original)
+++ incubator/cassandra/trunk/src/java/org/apache/cassandra/io/SSTableWriter.java Fri Aug
 7 20:57:02 2009
@@ -11,6 +11,7 @@
 
 import org.apache.cassandra.dht.IPartitioner;
 import org.apache.cassandra.utils.BloomFilter;
+import org.apache.cassandra.config.DatabaseDescriptor;
 import com.reardencommerce.kernel.collections.shared.evictable.ConcurrentLinkedHashMap;
 
 public class SSTableWriter extends SSTable
@@ -26,8 +27,8 @@
     public SSTableWriter(String filename, int keyCount, IPartitioner partitioner) throws
IOException
     {
         super(filename, partitioner);
-        dataFile = new BufferedRandomAccessFile(path, "rw", 4 * 1024 * 1024);
-        indexFile = new BufferedRandomAccessFile(indexFilename(), "rw", 1024 * 1024);
+        dataFile = new BufferedRandomAccessFile(path, "rw", DatabaseDescriptor.getFlushDataBufferSizeInMB()
* 1024 * 1024);
+        indexFile = new BufferedRandomAccessFile(indexFilename(), "rw", DatabaseDescriptor.getFlushIndexBufferSizeInMB()
* 1024 * 1024);
         bf = new BloomFilter(keyCount, 15);
     }
 

Modified: incubator/cassandra/trunk/test/system/stress.py
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/test/system/stress.py?rev=802185&r1=802184&r2=802185&view=diff
==============================================================================
--- incubator/cassandra/trunk/test/system/stress.py (original)
+++ incubator/cassandra/trunk/test/system/stress.py Fri Aug  7 20:57:02 2009
@@ -30,7 +30,7 @@
         self.count = 0
         client = get_client(port=9160)
         client.transport.open()
-        for i in xrange(0, 1000):
+        for i in xrange(0, 200):
             data = md5(str(i)).hexdigest()
             for j in xrange(0, 1000):
                 key = '%s.%s.%s' % (time.time(), id, j)



Mime
View raw message