cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Matthew F. Dennis (JIRA)" <j...@apache.org>
Subject [jira] Updated: (CASSANDRA-1230) Memory use grows extremely fast with super column families
Date Mon, 28 Jun 2010 19:15:50 GMT

     [ https://issues.apache.org/jira/browse/CASSANDRA-1230?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Matthew F. Dennis updated CASSANDRA-1230:
-----------------------------------------

    Description: 
I have a script that inserts about 1kB of key/values into 10k super columns each into 1k rows.
Or at least I tried to. I noticed that Cassandra's memory usage went up so fast that I was
only able to insert into a few dozen rows before my machine run out of memory. When I use
regular column families Cassandra's memory usage seems pretty flat, so this seems an issue
specifically with super columns.

test program is attached and copied below

{code}
#!/usr/bin/env python
# Program to demonstrate a use case where Cassandra memory usage grows
# without bounds using super column family:
#  -  1 row  140 MB RES 1400 MB VIRT
#  -  5 rows 532        1600
#  - 10      580        1632
#  - 20      801        1775
#  - 40      958        2047
#  ...
#
# Stopping Cassandra and restarting makes it jump immediately to the same
# virtual memory usage. Resident memory size seems to be about
# half of the state prior to stopping.
# 
# _JAVA_OPTIONS: -Xms64m -Xmx1G
# Cassandra 0.6.2 with default storage-conf.xml on single node
# Ubuntu 10.04 64bit
# sun-java6
# pycassa 0.3.0

import uuid

import pycassa

def insert10k(cf, rowkey):
    for i in xrange(10000):
        cf.insert(rowkey, {
                str(i): {
                    "abcdefghijklmnopqrstuvwxyz":'1234567890',
                    "bbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "cbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "dbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "ebcdefghijklmnopqrstuvwxyz":'1234567890',
                    "fbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "gbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "hbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "ibcdefghijklmnopqrstuvwxyz":'1234567890',
                    "jbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "kbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "lbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "mbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "nbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "obcdefghijklmnopqrstuvwxyz":'1234567890',
                    "pbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "qbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "rbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "sbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "tbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "ubcdefghijklmnopqrstuvwxyz":'1234567890',
                    "vbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "wbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "xbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "ybcdefghijklmnopqrstuvwxyz":'1234567890',
                    "zbcdefghijklmnopqrstuvwxyz":'1234567890',
                    },
                })    

def super_column():
    client = pycassa.connect()
    cf = pycassa.ColumnFamily(client, 'Keyspace1', 'Super1', super=True)

    i = 0
    while i < 1000:
        insert10k(cf, uuid.uuid4().hex)
        print i, 'inserted 10k'
        i += 1

if __name__ == '__main__':
    super_column()
{code}


  was:
I have a script that inserts about 1kB of key/values into 10k super columns each into 1k rows.
Or at least I tried to. I noticed that Cassandra's memory usage went up so fast that I was
only able to insert into a few dozen rows before my machine run out of memory. When I use
regular column families Cassandra's memory usage seems pretty flat, so this seems an issue
specifically with super columns.

Here's the test program:

#!/usr/bin/env python
# Program to demonstrate a use case where Cassandra memory usage grows
# without bounds using super column family:
#  -  1 row  140 MB RES 1400 MB VIRT
#  -  5 rows 532        1600
#  - 10      580        1632
#  - 20      801        1775
#  - 40      958        2047
#  ...
#
# Stopping Cassandra and restarting makes it jump immediately to the same
# virtual memory usage. Resident memory size seems to be about
# half of the state prior to stopping.
# 
# _JAVA_OPTIONS: -Xms64m -Xmx1G
# Cassandra 0.6.2 with default storage-conf.xml on single node
# Ubuntu 10.04 64bit
# sun-java6
# pycassa 0.3.0

import uuid

import pycassa

def insert10k(cf, rowkey):
    for i in xrange(10000):
        cf.insert(rowkey, {
                str(i): {
                    "abcdefghijklmnopqrstuvwxyz":'1234567890',
                    "bbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "cbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "dbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "ebcdefghijklmnopqrstuvwxyz":'1234567890',
                    "fbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "gbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "hbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "ibcdefghijklmnopqrstuvwxyz":'1234567890',
                    "jbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "kbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "lbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "mbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "nbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "obcdefghijklmnopqrstuvwxyz":'1234567890',
                    "pbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "qbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "rbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "sbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "tbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "ubcdefghijklmnopqrstuvwxyz":'1234567890',
                    "vbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "wbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "xbcdefghijklmnopqrstuvwxyz":'1234567890',
                    "ybcdefghijklmnopqrstuvwxyz":'1234567890',
                    "zbcdefghijklmnopqrstuvwxyz":'1234567890',
                    },
                })    

def super_column():
    client = pycassa.connect()
    cf = pycassa.ColumnFamily(client, 'Keyspace1', 'Super1', super=True)

    i = 0
    while i < 1000:
        insert10k(cf, uuid.uuid4().hex)
        print i, 'inserted 10k'
        i += 1

if __name__ == '__main__':
    super_column()



> Memory use grows extremely fast with super column families
> ----------------------------------------------------------
>
>                 Key: CASSANDRA-1230
>                 URL: https://issues.apache.org/jira/browse/CASSANDRA-1230
>             Project: Cassandra
>          Issue Type: Bug
>    Affects Versions: 0.6
>         Environment: Single node Ubuntu 10.04 64 bit, sun-java6 from partner repositories,
using pycassa 0.3.0 to insert events.
>            Reporter: Heikki Toivonen
>            Priority: Critical
>             Fix For: 0.6.4
>
>         Attachments: supercolbug.py
>
>
> I have a script that inserts about 1kB of key/values into 10k super columns each into
1k rows. Or at least I tried to. I noticed that Cassandra's memory usage went up so fast that
I was only able to insert into a few dozen rows before my machine run out of memory. When
I use regular column families Cassandra's memory usage seems pretty flat, so this seems an
issue specifically with super columns.
> test program is attached and copied below
> {code}
> #!/usr/bin/env python
> # Program to demonstrate a use case where Cassandra memory usage grows
> # without bounds using super column family:
> #  -  1 row  140 MB RES 1400 MB VIRT
> #  -  5 rows 532        1600
> #  - 10      580        1632
> #  - 20      801        1775
> #  - 40      958        2047
> #  ...
> #
> # Stopping Cassandra and restarting makes it jump immediately to the same
> # virtual memory usage. Resident memory size seems to be about
> # half of the state prior to stopping.
> # 
> # _JAVA_OPTIONS: -Xms64m -Xmx1G
> # Cassandra 0.6.2 with default storage-conf.xml on single node
> # Ubuntu 10.04 64bit
> # sun-java6
> # pycassa 0.3.0
> import uuid
> import pycassa
> def insert10k(cf, rowkey):
>     for i in xrange(10000):
>         cf.insert(rowkey, {
>                 str(i): {
>                     "abcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "bbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "cbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "dbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "ebcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "fbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "gbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "hbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "ibcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "jbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "kbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "lbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "mbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "nbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "obcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "pbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "qbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "rbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "sbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "tbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "ubcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "vbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "wbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "xbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "ybcdefghijklmnopqrstuvwxyz":'1234567890',
>                     "zbcdefghijklmnopqrstuvwxyz":'1234567890',
>                     },
>                 })    
> def super_column():
>     client = pycassa.connect()
>     cf = pycassa.ColumnFamily(client, 'Keyspace1', 'Super1', super=True)
>     i = 0
>     while i < 1000:
>         insert10k(cf, uuid.uuid4().hex)
>         print i, 'inserted 10k'
>         i += 1
> if __name__ == '__main__':
>     super_column()
> {code}

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


Mime
View raw message