cassandra-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From David Boxenhorn <da...@lookin2.com>
Subject Re: Tuning Cassandra
Date Mon, 10 May 2010 08:47:43 GMT
You asked for it! You might want to skip to Cassandra.save(....) ...

public class BuildCassandraDB
{
   private static void importContentInterests(Connection connection) throws
Exception
   {
      Statement statement;
      ResultSet resultSet;
      String sql;
      int count, totalcount;

      /*
       * Select Contents/Categories Map, Categories/Interests Map,
       * Where categories are equal, order by Contents, save Interests to
Contents
       */
      sql = "select content, interest, name " +
            "from contents_attractions_map, attractions_categories_map,
categories_interests_map, interests " +
            "where contents_attractions_map.attraction =
attractions_categories_map.attraction and " +
            "attractions_categories_map.category =
categories_interests_map.category and " +
            "categories_interests_map.interest = interests.id " +
            "order by contents_attractions_map.content";

      statement = connection.createStatement();
      resultSet = statement.executeQuery(sql);

      HashMap<Integer, ContentCass> contentMap = new HashMap<Integer,
ContentCass>();

      count = 0;

      while (resultSet.next())
      {
          Integer contentId = resultSet.getInt("CONTENT");
          Integer interestId = resultSet.getInt("INTEREST");
          String interestName = resultSet.getString("NAME");

          ContentCass contentCass = contentMap.get(contentId);

          if (contentCass == null)
          {
             contentCass = new ContentCass(contentId.toString(), false);
             contentMap.put(contentId, contentCass);
             count++;
          }

          contentCass.addInterest(interestId.toString(), interestName);

          if (count >= 1000)
          {
             for (ContentCass saveContentCass : contentMap.values())
                saveContentCass.save();

             contentMap.clear();
             count = 0;
          }
      }

      for (ContentCass saveContentCass : contentMap.values())
         saveContentCass.save();

      contentMap.clear();

      resultSet.close();
   }
}

public class ContentCass extends BaseCass
{

   public ContentCass(String rowKey, boolean populateEntity) throws
CassException
   {
      super(rowKey, populateEntity);
   }

   @Override
   public String getColumnFamily()
   {
      return "Content";
   }

   @Override
   public boolean isSuper()
   {
      return true;
   }

   /*
    * Setters
    */
   public void setDocument(String document)
   {
      setVal("data", "document", document);
   }

   public void setStrippedChecksum(String strippedChecksum)
   {
      setVal("data", "strippedChecksum", strippedChecksum);
   }

   public void setStrippedDuplicate(String strippedDuplicate)
   {
      setVal("data", "strippedDuplicate", strippedDuplicate);
   }

   public void setText(String text)
   {
      setVal("data", "text", text);
   }

   public void setAuthor(String author)
   {
      setVal("data", "author", author);
   }

   public void setReferringContent(String referringContent)
   {
      setVal("data", "referringContent", referringContent);
   }

   public void setStatus(String status)
   {
      setVal("data", "status", status);
   }

   public void setVersion(String version)
   {
      setVal("data", "version", version);
   }

   public void setSentiment(String sentiment)
   {
      setVal("data", "sentiment", sentiment);
   }

   public void setCreated(String created)
   {
      setVal("data", "created", created);
   }

   public void setSnippet(String snippet)
   {
      setVal("data", "snippet", snippet);
   }

   public void setTitle(String title)
   {
      setVal("data", "title", title);
   }

   public void setDocumentURL(String documentURL)
   {
      setVal("data", "documentURL", documentURL);
   }

   public void setSourceType(String sourceType)
   {
      setVal("data", "sourceType", sourceType);
   }

   public void addAttraction(String attractionId, String attractionName)
   {
      setVal("attractions", attractionId, attractionName);
   }

   public void addCategory(String categoryId, String categoryName)
   {
      setVal("categories", categoryId, categoryName);
   }

   public void addInterest(String interestId, String interestName)
   {
      setVal("interests", interestId, interestName);
   }

   public void addGeoSite(String geoSiteId, String geoSiteName)
   {
      setVal("geoSites", geoSiteId, geoSiteName);
   }

   /*
    * Getters
    */
   public String getDocument()
   {
      return (String)getVal("data", "document");
   }

   public String getStrippedChecksum()
   {
      return (String)getVal("data", "strippedChecksum");
   }

   public String getStrippedDuplicate()
   {
      return (String)getVal("data", "strippedDuplicate");
   }

   public String getText()
   {
      return (String)getVal("data", "text");
   }

   public String getAuthor()
   {
      return (String)getVal("data", "author");
   }

   public String getReferringContent()
   {
      return (String)getVal("data", "referringContent");
   }

   public String getStatus()
   {
      return (String)getVal("data", "status");
   }

   public String getVersion()
   {
      return (String)getVal("data", "version");
   }

   public String getSentiment()
   {
      return (String)getVal("data", "sentiment");
   }

   public String getCreated()
   {
      return (String)getVal("data", "created");
   }

   public String getSnippet()
   {
      return (String)getVal("data", "snippet");
   }

   public String getTitle()
   {
      return (String)getVal("data", "title");
   }

   public String getDocumentURL()
   {
      return (String)getVal("data", "documentURL");
   }

   public String getSourceType()
   {
      return (String)getVal("data", "sourceType");
   }

   public TupleSet<String> getAttractions()
   {
      return (TupleSet<String>) getVal("attractions");
   }

   public TupleSet<String> getCategories()
   {
      return (TupleSet<String>)getVal("categories");
   }

   public TupleSet<String> getInterests()
   {
      return (TupleSet<String>)getVal("interests");
   }

   public TupleSet<String> getGeoSites()
   {
      return (TupleSet<String>)getVal("geoSites");
   }
}

package com.lookin2.cassandra.entities;

import java.util.Collection;

import com.lookin2.cassandra.*;
import com.lookin2.cassandra.CAO.*;

public abstract class BaseCass
{
   private final Tuple<String> rowTuple;

   /*
    *  The name of the column family
    */
   public abstract String getColumnFamily();

   /*
    * Whether the column family is super
    */
   public abstract boolean isSuper();

   protected BaseCass(String rowKey, boolean populateEntity) throws
CassException
   {
      if (populateEntity)
         rowTuple = populate(rowKey);
      else
      {
         rowTuple = new Tuple<String>(rowKey);
         rowTuple.setVal(new TupleSet<String>());
      }
   }

   protected Tuple<String> populate(String rowKey) throws CassException
   {
      return Cassandra.get(getColumnFamily(), rowKey, isSuper());
   }

   public void save() throws CassException
   {
      Cassandra.save (getColumnFamily(), rowTuple);
   }

   private Tuple<String> getTuple(String key, boolean create)
   {
      TupleSet<String> columnTupleSet = (TupleSet<String>)
rowTuple.getVal();

      Tuple<String> columnTuple = (Tuple<String>)columnTupleSet.get(key);

      if (create && columnTuple == null)
      {
         columnTuple = new Tuple<String>(key);
         columnTupleSet.add(columnTuple);
      }

      return columnTuple;
   }

   private Tuple<String> getTuple(String superKey, String key, boolean
create)
   {
      TupleSet<String> superColumnTupleSet = (TupleSet<String>)
rowTuple.getVal();

      Tuple<String> superColumnTuple =
(Tuple<String>)superColumnTupleSet.get(superKey);

      TupleSet<String> columnTupleSet;

      if (superColumnTuple == null)
      {
         if (create)
         {
            columnTupleSet = new TupleSet<String>();
            superColumnTuple = new Tuple<String>(superKey, columnTupleSet);
            superColumnTupleSet.add(superColumnTuple);
         }
         else
            return null;
      }
      else
         columnTupleSet = (TupleSet<String>)superColumnTuple.getVal();


      Tuple<String> columnTuple = (Tuple<String>)columnTupleSet.get(key);

      if (create && columnTuple == null)
      {
         columnTuple = new Tuple<String>(key);
         columnTupleSet.add(columnTuple);
      }

      return columnTuple;
   }

   protected void setVal(String key, Collection<Tuple<? extends
Comparable<String>>> tupleCollection)
   {
      if (key == null || tupleCollection == null)
         return;

      TupleSet<String> superColumnTupleSet = (TupleSet<String>)
rowTuple.getVal();
      superColumnTupleSet.addAll(tupleCollection);
   }

   protected void setVal(String key, Object val)
   {
      if (key == null || val == null)
         return;

      getTuple(key, true).setVal(val);
   }

   protected void setVal(String superKey, String key, Object val)
   {
      if (superKey == null || key == null || val == null)
         return;

      getTuple(superKey, key, true).setVal(val);
   }

   protected void delete(String key)
   {
      if (key == null)
         return;

      getTuple(key, true).setVal(null);
   }

   protected void delete(String superKey, String key)
   {
      if (superKey == null || key == null)
         return;

      getTuple(superKey, key, true).setVal(null);
   }

   protected Tuple<String> getRow()
   {
      return rowTuple;
   }

   protected Object getVal(String key)
   {
      return getTuple(key, false).getVal();
   }

   protected Object getVal(String superKey, String key)
   {
      return getTuple(key, false).getVal();
   }

   @Override
   public String toString()
   {
      return rowTuple.toString();
   }
}

package com.lookin2.cassandra.CAO;

import java.util.*;

import org.apache.cassandra.thrift.*;

import me.prettyprint.cassandra.service.*;

import com.lookin2.cassandra.*;

public class Cassandra
{
   private static final String KEYSPACE = "Lookin2";
   private static final String URL_PORT = "localhost:9160";
   private static CassandraClientPool clientPool = null;

   private static synchronized CassandraClientPool getClientPool()
   {
      if (clientPool == null)
         clientPool = CassandraClientPoolFactory.INSTANCE.get();

      return clientPool;
   }

   private static Keyspace getKeyspace() throws CassException
   {
      try
      {
         if (clientPool == null)
            clientPool = getClientPool();

         CassandraClient client = clientPool.borrowClient(URL_PORT);
         return client.getKeyspace(KEYSPACE, ConsistencyLevel.ONE);
      }
      catch (Exception e)
      {
         e.printStackTrace();
         throw new CassException(e);
      }
   }

   private static void releaseKeyspace(Keyspace keyspace) throws
CassException
   {
      try
      {
         clientPool.releaseKeyspace(keyspace);
      }
      catch (Exception e)
      {
         e.printStackTrace();
         throw new CassException(e);
      }
   }

   public static Tuple<String> get(String columnFamily, String rowKey,
boolean isSuper) throws CassException
   {
      Keyspace keyspace = getKeyspace();

      ColumnParent columnParent = new ColumnParent(columnFamily);

      SliceRange sliceRange = new SliceRange();

      sliceRange.setStart(new byte[]{});
      sliceRange.setFinish(new byte[]{});

      SlicePredicate slicePredicate = new SlicePredicate();

      slicePredicate.setSlice_range(sliceRange);

      try
      {
         if (isSuper)
         {
            TupleSet<String> superColumnTupleSet = new TupleSet<String>();
            List<SuperColumn> superColumnList =
keyspace.getSuperSlice(rowKey, columnParent, slicePredicate);

            for (SuperColumn superColumn : superColumnList)
            {
               TupleSet<String> columnTupleSet = new TupleSet<String>();
               List<Column> columnList = superColumn.getColumns();

               for (Column column : columnList)
                  columnTupleSet.add(new String(column.getName()), new
String(column.getValue()));

               superColumnTupleSet.add(new String(superColumn.getName()),
columnTupleSet);
            }

            return new Tuple<String>(rowKey, superColumnTupleSet);
         }
         else
         {
            TupleSet<String> columnTupleSet = new TupleSet<String>();
            List<Column> columnList = keyspace.getSlice(rowKey,
columnParent, slicePredicate);

            for (Column column : columnList)
               columnTupleSet.add(new String(column.getName()), new
String(column.getValue()));

            return new Tuple<String>(rowKey, columnTupleSet);
         }
      }
      catch (Exception e)
      {
         throw new CassException(e);
      }
   }

   public static void save (String columnFamily, Tuple<?> rowTuple) throws
CassException
   {
      save (columnFamily, null, rowTuple);
   }

   public static void save (String columnFamily, TupleSet<?> rowTupleSet)
throws CassException
   {
      save (columnFamily, rowTupleSet, null);
   }

   private static void save(String columnFamily, TupleSet<?> rowTupleSet,
Tuple<?> rowTuple) throws CassException
   {
      BatchMutation batch;

      if (rowTupleSet != null)
         batch = buildBatchMutation(columnFamily, rowTupleSet);
      else if (rowTuple != null)
         batch = buildBatchMutation(columnFamily, rowTuple);
      else
         throw new CassException("Both rowTupleSet and rowTuple are null");

      try
      {
         Keyspace keyspace = getKeyspace();

         keyspace.batchMutate(batch);

         batch = null;

         releaseKeyspace(keyspace);
      }
      catch (Exception e)
      {
         throw new CassException(e);
      }

      Runtime.getRuntime().gc();
   }

   private static BatchMutation buildBatchMutation(String columnFamily,
Tuple<?> rowTuple)
   {
      BatchMutation batch = new BatchMutation();

      buildBatchMutation(batch, columnFamily, rowTuple);

      return batch;
   }

   private static BatchMutation buildBatchMutation(String columnFamily,
TupleSet<?> rowTupleSet)
   {
      BatchMutation batch = new BatchMutation();

      for (Tuple<?> rowTuple : rowTupleSet)
         buildBatchMutation(batch, columnFamily, rowTuple);

      return batch;
   }

   private static void buildBatchMutation(BatchMutation batch, String
columnFamily, Tuple<?> rowTuple)
   {
      TupleSet<?> rowValTupleSet = (TupleSet<?>) rowTuple.getVal();

      for (Tuple<?> superColumnTuple : rowValTupleSet) // Loop through the
columns of the row
      {
         Object val = superColumnTuple.getVal();

         if (val instanceof TupleSet<?>) // If the value is a TupleSet, then
this is a supertable
         {
            TupleSet<?> superColumnTupleSet = (TupleSet<?>)val;

            for (Tuple<?> columnTuple : superColumnTupleSet) // Loop over
the superColumns
            {
               buildBatchMutation(batch, columnFamily, rowTuple,
superColumnTuple, columnTuple);
            }
         }
         else
         {
            Tuple<?> columnTuple = (Tuple<?>)val;
            buildBatchMutation(batch, columnFamily, rowTuple,  null,
columnTuple);
         }
      }
   }

   private static void buildBatchMutation(BatchMutation batch,
                                          String columnFamily, Tuple<?>
rowTuple,  Tuple<?> superColumnTuple, Tuple<?> columnTuple)
   {
      if (!columnTuple.isUpdated())
         return;

      String key = rowTuple.getKeyString();
      List<String> columnFamilies = new ArrayList<String>();

      columnFamilies.add(columnFamily);

      if (columnTuple.getVal() == null)
      {
         Deletion deletion = new Deletion();
         SlicePredicate slicePredicate = new SlicePredicate();
         List<byte[]> columnList = new ArrayList<byte[]>();

         columnList.add(columnTuple.getKeyBytes());
         slicePredicate.setColumn_names(columnList);

         if (superColumnTuple != null)
            deletion.setSuper_column(superColumnTuple.getKeyBytes());

         deletion.predicate = slicePredicate;

         batch.addDeletion(key, columnFamilies, deletion);
      }
      else
      {
         Column column = new Column();

         column.setName(columnTuple.getKeyBytes());
         column.setValue(columnTuple.getValBytes());
         column.setTimestamp(System.currentTimeMillis() * 1000);

         if (superColumnTuple == null)
            batch.addInsertion(key, columnFamilies, column);
         else
         {
            SuperColumn superColumn = new SuperColumn();
            List<Column> columns = new ArrayList<Column>();

            columns.add(column);
            superColumn.setColumns(columns);
            superColumn.setName(superColumnTuple.getKeyBytes());

            batch.addSuperInsertion(key, columnFamilies, superColumn);
         }
      }
   }
}



On Mon, May 10, 2010 at 11:27 AM, vd <vineetdaniel@gmail.com> wrote:

> What is the complete code string you are using to connect with cassandra
> from Java code
>
>
>
>
> On Mon, May 10, 2010 at 1:49 PM, David Boxenhorn <david@lookin2.com>wrote:
>
>> I don't know what "TSocket or the buffered one" means. Maybe I should
>> know?
>>
>> I'm using Hector. Does that explain anything?
>>
>> On Mon, May 10, 2010 at 11:15 AM, vd <vineetdaniel@gmail.com> wrote:
>>
>>>
>>> Hi
>>>
>>> what is it that you are using to connect with cassnadra TSocket or the
>>> buffered one ?
>>>
>>>
>>> ____________________________________
>>>
>>> _______________________________________
>>>
>>>
>>>
>>>
>>> On Mon, May 10, 2010 at 1:29 PM, David Boxenhorn <david@lookin2.com>wrote:
>>>
>>>> I'm running Java on the client, jdbc queries on Oracle, Hector on
>>>> Cassandra.
>>>>
>>>> The Cassandra and Oracle database designs are radically different, as
>>>> you might guess.
>>>>
>>>> I have no doubt that Cassandra can be tuned, in a multiple-server
>>>> cluster, to have superior throughput (that's why I'm doing it!). But for
>>>> now, it's really frustrating my development effort that Cassandra is so
>>>> slow. Can't I get it up to twice as slow as Oracle in my configuration?
>>>>
>>>> On Mon, May 10, 2010 at 10:47 AM, vd <vineetdaniel@gmail.com> wrote:
>>>>
>>>>> Hi David
>>>>>
>>>>> If I may ask...how do you plan to import data from oracle to cassandra
>>>>> ?
>>>>> As answer AFAIK cassandra's true ability comes into play when running
>>>>> on more than one machine...and please share how you are making comparisons
>>>>> like on writes or reads from cassandra.
>>>>>
>>>>>
>>>>>
>>>>> _______________________________________
>>>>> _______________________________________
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>> On Mon, May 10, 2010 at 1:04 PM, David Boxenhorn <david@lookin2.com>wrote:
>>>>>
>>>>>> I'm running Oracle and Cassandra on my machine, trying to import
my
>>>>>> data to Cassandra from Oracle.
>>>>>>
>>>>>> In my configuration Oracle is about ten times faster than Cassandra.
>>>>>> Cassandra has out-of-the-box tuning.
>>>>>>
>>>>>> I am new to Cassandra. How do I begin trying to tune it?
>>>>>>
>>>>>> Thanks.
>>>>>>
>>>>>
>>>>>
>>>>
>>>
>>
>

Mime
View raw message