incubator-cassandra-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "John R. Frank" <...@mit.edu>
Subject Re: smallest/largest UUIDs for LexicalUUIDType
Date Sat, 08 Jun 2013 01:30:45 GMT
> Follow-up question:  it seems that range queries on the *second* field 
> of a CompositeType(UUIDType(), UUIDType()) do not work.

If I concatenate the two UUID.hex values into a 32-character string 
instead of a CompositeType of two UUIDs, then range queries work 
correctly.

This is illustrated below... so the question is:  what is the point of a 
CompositeType if range queries only work on the first field?  Is it just a 
convenience class for keeping things strongly typed and cleanly organized, 
or did I break something in the way I setup CompositeType in the example 
earlier in this thread?


def join_uuids(*uuids):
     return ''.join(map(attrgetter('hex'), uuids))

def split_uuids(uuid_str):
     return map(lambda s: uuid.UUID(hex=''.join(s)), grouper(uuid_str, 32))

def grouper(iterable, n, fillvalue=None):
     "Collect data into fixed-length chunks or blocks"
     # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
     args = [iter(iterable)] * n
     return itertools.izip_longest(fillvalue=fillvalue, *args)

def test_composite_column_names_second_level_range_query_with_decomposited_keys():
     '''
     check that we can execute range queries on the second part of a
     CompositeType column name after we unpack the composite key into a
     long string of concatenated hex forms of the UUIDs
     '''
     sm = SystemManager(chosen_server)
     sm.create_keyspace(namespace, SIMPLE_STRATEGY, {'replication_factor': '1'})

     family = 'test'
     sm.create_column_family(
         namespace, family, super=False,
         key_validation_class = ASCII_TYPE,
         default_validation_class = BYTES_TYPE,
         comparator_type=UTF8Type(),
         )
     pool = ConnectionPool(namespace, config['storage_addresses'],
                           max_retries=1000, pool_timeout=10, pool_size=2, timeout=120)

     cf = pycassa.ColumnFamily(pool, family)
     u1, u2, u3, u4 = uuid.uuid1(), uuid.uuid1(), uuid.uuid1(), uuid.uuid1()

     cf.insert('inbound', {join_uuids(u1, u2): b''})
     cf.insert('inbound', {join_uuids(u1, u3): b''})
     cf.insert('inbound', {join_uuids(u1, u4): b''})

     ## test range searching
     start  = uuid.UUID(int=u3.int - 1)
     finish = uuid.UUID(int=u3.int + 1)
     assert start.int < u3.int < finish.int
     rec3 = cf.get('inbound',
                   column_start =join_uuids(u1, start),
                   column_finish=join_uuids(u1, finish)).items()
     assert len(rec3) == 1
     assert split_uuids(rec3[0][0])[1] == u3
     ####  This assert above passes!

     ####  This next part fails :-/
     ## now insert many rows -- enough that some should fall in each
     ## subrange below
     for i in xrange(1000):
         cf.insert('inbound', {join_uuids(u1, uuid.uuid4()): b''})

     ## do four ranges, and expect more than zero in each
     step_size = 2**(128 - 2)
     for i in range(2**2, 0, -1):
         start =  uuid.UUID(int=(i-1) * step_size)
         finish = uuid.UUID(int=min(i * step_size, 2**128 - 1))
         recs = cf.get('inbound',
                       column_start =join_uuids(u1, start),
                       column_finish=join_uuids(u1, finish)).items()
         for key, val in recs:
             key = split_uuids(key)
             assert val == b''
             assert key[0] == u1
             assert key[1] < finish
             assert start < key[1]   ## this passes!! (fails with CompositeType...)

         assert len(recs) > 0
         print len(recs), ' for ', start, finish

     sm.close()

Mime
View raw message