cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pma...@apache.org
Subject svn commit: r749218 [32/34] - in /incubator/cassandra: branches/ dist/ nightly/ site/ tags/ trunk/ trunk/lib/ trunk/src/ trunk/src/org/ trunk/src/org/apache/ trunk/src/org/apache/cassandra/ trunk/src/org/apache/cassandra/analytics/ trunk/src/org/apache...
Date Mon, 02 Mar 2009 07:57:31 GMT
Added: incubator/cassandra/trunk/src/org/apache/cassandra/tools/KeyExtracter.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/org/apache/cassandra/tools/KeyExtracter.java?rev=749218&view=auto
==============================================================================
--- incubator/cassandra/trunk/src/org/apache/cassandra/tools/KeyExtracter.java (added)
+++ incubator/cassandra/trunk/src/org/apache/cassandra/tools/KeyExtracter.java Mon Mar  2 07:57:22 2009
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.tools;
+
+import java.io.DataOutputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.cassandra.io.DataInputBuffer;
+import org.apache.cassandra.io.DataOutputBuffer;
+import org.apache.cassandra.io.IFileReader;
+import org.apache.cassandra.io.SSTable;
+import org.apache.cassandra.io.SequenceFile;
+import org.apache.cassandra.io.SSTable.KeyPositionInfo;
+import org.apache.cassandra.utils.BasicUtilities;
+
+
+public class KeyExtracter
+{
+    private static final int bufferSize_ = 64*1024;
+
+    public static void main(String[] args) throws Throwable
+    {
+        if ( args.length != 3 )
+        {
+            System.out.println("Usage : java com.facebook.infrastructure.tools.IndexBuilder <key to extract> <data file> <output file>");
+            System.exit(1);
+        }
+		String keyToExtract = args[0];
+		String dataFile = args[1];
+		String outputFile = args[2];
+
+        extractKeyIntoFile(keyToExtract, dataFile, outputFile);
+    }
+
+    public static boolean extractKeyIntoFile(String keyToExtract, String dataFile, String outputFile) throws IOException
+    {
+		IFileReader dataReader = SequenceFile.bufferedReader(dataFile, bufferSize_);
+        DataOutputBuffer bufOut = new DataOutputBuffer();
+        DataInputBuffer bufIn = new DataInputBuffer();
+
+    	try
+    	{
+            while ( !dataReader.isEOF() )
+            {
+                bufOut.reset();
+                dataReader.next(bufOut);
+                bufIn.reset(bufOut.getData(), bufOut.getLength());
+                /* Key just read */
+                String key = bufIn.readUTF();
+                /* check if we want this key */
+                if ( key.equals(keyToExtract) )
+                {
+                	int keySize = bufIn.readInt();
+                	byte[] keyData = new byte[keySize];
+                	bufIn.read(keyData, 0, keySize);
+
+                	/* write the key data into a file */
+                    RandomAccessFile raf = new RandomAccessFile(outputFile, "rw");                	
+                	raf.writeUTF(key);
+                	raf.writeInt(keySize);
+                	raf.write(keyData);
+                    dumpBlockIndex(keyToExtract, 0L, keySize, raf);
+                    raf.close();
+                    return true;
+                }
+            }
+        }
+        finally
+        {
+            dataReader.close();
+        }
+
+        return false;
+    }
+    
+    private static void dumpBlockIndex(String key, long position, long size, RandomAccessFile raf) throws IOException
+    {
+        DataOutputBuffer bufOut = new DataOutputBuffer();                       
+        /* Number of keys in this block */
+        bufOut.writeInt(1);
+        bufOut.writeUTF(key);
+        bufOut.writeLong(position);
+        bufOut.writeLong(size);
+        
+        /* Write out the block index. */
+        raf.writeUTF(SSTable.blockIndexKey_);
+        raf.writeInt(bufOut.getLength());
+        raf.write(bufOut.getData(), 0, bufOut.getLength());
+    }
+}

Added: incubator/cassandra/trunk/src/org/apache/cassandra/tools/MembershipCleaner.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/org/apache/cassandra/tools/MembershipCleaner.java?rev=749218&view=auto
==============================================================================
--- incubator/cassandra/trunk/src/org/apache/cassandra/tools/MembershipCleaner.java (added)
+++ incubator/cassandra/trunk/src/org/apache/cassandra/tools/MembershipCleaner.java Mon Mar  2 07:57:22 2009
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.tools;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Serializable;
+import java.math.BigInteger;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.cassandra.io.ICompactSerializer;
+import org.apache.cassandra.net.EndPoint;
+import org.apache.cassandra.net.Message;
+import org.apache.cassandra.net.MessagingService;
+import org.apache.cassandra.service.StorageService;
+import org.apache.cassandra.utils.FBUtilities;
+import org.apache.cassandra.io.*;
+import org.apache.cassandra.utils.*;
+
+public class MembershipCleaner
+{
+    private static final int port_ = 7000;
+    private static final long waitTime_ = 10000;
+    
+    public static void main(String[] args) throws Throwable
+    {
+        if ( args.length != 3 )
+        {
+            System.out.println("Usage : java com.facebook.infrastructure.tools.MembershipCleaner " +
+                    "<ip:port to send the message> " +
+                    "<node which needs to be removed> " +
+                    "<file containing all nodes in the cluster>");
+            System.exit(1);
+        }
+        
+        String ipPort = args[0];
+        String node = args[1];
+        String file = args[2];
+        
+        String[] ipPortPair = ipPort.split(":");
+        EndPoint target = new EndPoint(ipPortPair[0], Integer.valueOf(ipPortPair[1]));
+        MembershipCleanerMessage mcMessage = new MembershipCleanerMessage(node);
+        
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        DataOutputStream dos = new DataOutputStream(bos);
+        MembershipCleanerMessage.serializer().serialize(mcMessage, dos);
+        /* Construct the token update message to be sent */
+        Message mbrshipCleanerMessage = new Message( new EndPoint(FBUtilities.getHostName(), port_), "", StorageService.mbrshipCleanerVerbHandler_, new Object[]{bos.toByteArray()} );
+        
+        BufferedReader bufReader = new BufferedReader( new InputStreamReader( new FileInputStream(file) ) );
+        String line = null;
+       
+        while ( ( line = bufReader.readLine() ) != null )
+        {            
+            mbrshipCleanerMessage.addHeader(line, line.getBytes());
+        }
+        
+        System.out.println("Sending a membership clean message to " + target);
+        MessagingService.getMessagingInstance().sendOneWay(mbrshipCleanerMessage, target);
+        Thread.sleep(MembershipCleaner.waitTime_);
+        System.out.println("Done sending the update message");
+    }
+    
+    public static class MembershipCleanerMessage implements Serializable
+    {
+        private static ICompactSerializer<MembershipCleanerMessage> serializer_;
+        private static AtomicInteger idGen_ = new AtomicInteger(0);
+        
+        static
+        {
+            serializer_ = new MembershipCleanerMessageSerializer();            
+        }
+        
+        static ICompactSerializer<MembershipCleanerMessage> serializer()
+        {
+            return serializer_;
+        }
+
+        private String target_;
+        
+        MembershipCleanerMessage(String target)
+        {
+            target_ = target;        
+        }
+        
+        String getTarget()
+        {
+            return target_;
+        }
+    }
+    
+    public static class MembershipCleanerMessageSerializer implements ICompactSerializer<MembershipCleanerMessage>
+    {
+        public void serialize(MembershipCleanerMessage mcMessage, DataOutputStream dos) throws IOException
+        {            
+            dos.writeUTF(mcMessage.getTarget() );                      
+        }
+        
+        public MembershipCleanerMessage deserialize(DataInputStream dis) throws IOException
+        {            
+            return new MembershipCleanerMessage(dis.readUTF());
+        }
+    }
+}

Added: incubator/cassandra/trunk/src/org/apache/cassandra/tools/MembershipCleanerVerbHandler.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/org/apache/cassandra/tools/MembershipCleanerVerbHandler.java?rev=749218&view=auto
==============================================================================
--- incubator/cassandra/trunk/src/org/apache/cassandra/tools/MembershipCleanerVerbHandler.java (added)
+++ incubator/cassandra/trunk/src/org/apache/cassandra/tools/MembershipCleanerVerbHandler.java Mon Mar  2 07:57:22 2009
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.tools;
+
+import java.util.*;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.math.BigInteger;
+
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.gms.Gossiper;
+import org.apache.cassandra.io.DataInputBuffer;
+import org.apache.cassandra.net.EndPoint;
+import org.apache.cassandra.net.IVerbHandler;
+import org.apache.cassandra.net.Message;
+import org.apache.cassandra.net.MessagingService;
+import org.apache.cassandra.service.StorageService;
+import org.apache.cassandra.tools.TokenUpdater.TokenInfoMessage;
+import org.apache.cassandra.utils.LogUtil;
+import org.apache.log4j.Logger;
+import org.apache.cassandra.io.*;
+import org.apache.cassandra.config.*;
+
+/**
+ * Author : Avinash Lakshman ( alakshman@facebook.com) & Prashant Malik ( pmalik@facebook.com )
+ */
+
+public class MembershipCleanerVerbHandler implements IVerbHandler
+{
+    private static Logger logger_ = Logger.getLogger(MembershipCleanerVerbHandler.class);
+
+    public void doVerb(Message message)
+    {
+        byte[] body = (byte[])message.getMessageBody()[0];
+        
+        try
+        {
+            DataInputBuffer bufIn = new DataInputBuffer();
+            bufIn.reset(body, body.length);
+            /* Deserialize to get the token for this endpoint. */
+            MembershipCleaner.MembershipCleanerMessage mcMessage = MembershipCleaner.MembershipCleanerMessage.serializer().deserialize(bufIn);
+            
+            String target = mcMessage.getTarget();
+            logger_.info("Removing the node [" + target + "] from membership");
+            EndPoint targetEndPoint = new EndPoint(target, DatabaseDescriptor.getControlPort());
+            /* Remove the token related information for this endpoint */
+            StorageService.instance().removeTokenState(targetEndPoint);
+            
+            /* Get the headers for this message */
+            Map<String, byte[]> headers = message.getHeaders();
+            headers.remove( StorageService.getLocalStorageEndPoint().getHost() );
+            logger_.debug("Number of nodes in the header " + headers.size());
+            Set<String> nodes = headers.keySet();
+            
+            for ( String node : nodes )
+            {            
+                logger_.debug("Processing node " + node);
+                byte[] bytes = headers.remove(node);
+                /* Send a message to this node to alter its membership state. */
+                EndPoint targetNode = new EndPoint(node, DatabaseDescriptor.getStoragePort());                
+                
+                logger_.debug("Sending a membership clean message to " + targetNode);
+                MessagingService.getMessagingInstance().sendOneWay(message, targetNode);
+                break;
+            }                        
+        }
+        catch( IOException ex )
+        {
+            logger_.debug(LogUtil.throwableToString(ex));
+        }
+    }
+
+}

Added: incubator/cassandra/trunk/src/org/apache/cassandra/tools/ThreadListBuilder.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/org/apache/cassandra/tools/ThreadListBuilder.java?rev=749218&view=auto
==============================================================================
--- incubator/cassandra/trunk/src/org/apache/cassandra/tools/ThreadListBuilder.java (added)
+++ incubator/cassandra/trunk/src/org/apache/cassandra/tools/ThreadListBuilder.java Mon Mar  2 07:57:22 2009
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.tools;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.cassandra.io.DataOutputBuffer;
+import org.apache.cassandra.utils.BloomFilter;
+
+
+public class ThreadListBuilder
+{
+    private final static int bufSize_ = 64*1024*1024;
+    private final static int count_ = 128*1024*1024;
+    
+    public static void main(String[] args) throws Throwable
+    {
+        if ( args.length != 2 )
+        {
+            System.out.println("Usage : java com.facebook.infrastructure.tools.ThreadListBuilder <directory containing files to be processed> <directory to dump the bloom filter in.>");
+            System.exit(1);
+        }
+        
+        File directory = new File(args[0]);
+        File[] files = directory.listFiles();
+        List<DataOutputBuffer> buffers = new ArrayList<DataOutputBuffer>();    
+        BloomFilter bf = new BloomFilter(count_, 8);        
+        int keyCount = 0;
+        
+        /* Process the list of files. */
+        for ( File file : files )
+        {
+            System.out.println("Processing file " + file);
+            BufferedReader bufReader = new BufferedReader( new InputStreamReader( new FileInputStream(file) ), ThreadListBuilder.bufSize_ );
+            String line = null;
+            
+            while ( (line = bufReader.readLine()) != null )
+            {
+                /* After accumulating count_ keys reset the bloom filter. */
+                if ( keyCount > 0 && keyCount % count_ == 0 )
+                {                       
+                    DataOutputBuffer bufOut = new DataOutputBuffer();
+                    BloomFilter.serializer().serialize(bf, bufOut);
+                    System.out.println("Finished serializing the bloom filter");
+                    buffers.add(bufOut);
+                    bf = new BloomFilter(count_, 8);
+                }
+                line = line.trim();                
+                bf.fill(line);
+                ++keyCount;
+            }
+        }
+        
+        /* Add the bloom filter assuming the last one was left out */
+        DataOutputBuffer bufOut = new DataOutputBuffer();
+        BloomFilter.serializer().serialize(bf, bufOut);
+        buffers.add(bufOut);
+        
+        
+        int size = buffers.size();
+        for ( int i = 0; i < size; ++i )
+        {
+            DataOutputBuffer buffer = buffers.get(i);
+            String file = args[1] + System.getProperty("file.separator") + "Bloom-Filter-" + i + ".dat";
+            RandomAccessFile raf = new RandomAccessFile(file, "rw");
+            raf.write(buffer.getData(), 0, buffer.getLength());
+            raf.close();
+            buffer.close();
+        }
+        System.out.println("Done writing the bloom filter to disk");
+    }
+}

Added: incubator/cassandra/trunk/src/org/apache/cassandra/tools/TokenUpdateVerbHandler.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/org/apache/cassandra/tools/TokenUpdateVerbHandler.java?rev=749218&view=auto
==============================================================================
--- incubator/cassandra/trunk/src/org/apache/cassandra/tools/TokenUpdateVerbHandler.java (added)
+++ incubator/cassandra/trunk/src/org/apache/cassandra/tools/TokenUpdateVerbHandler.java Mon Mar  2 07:57:22 2009
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.tools;
+
+import java.util.*;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.math.BigInteger;
+
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.io.DataInputBuffer;
+import org.apache.cassandra.net.EndPoint;
+import org.apache.cassandra.net.IVerbHandler;
+import org.apache.cassandra.net.Message;
+import org.apache.cassandra.net.MessagingService;
+import org.apache.cassandra.service.StorageService;
+import org.apache.cassandra.tools.TokenUpdater.TokenInfoMessage;
+import org.apache.cassandra.utils.LogUtil;
+import org.apache.log4j.Logger;
+import org.apache.cassandra.io.*;
+import org.apache.cassandra.config.*;
+
+/**
+ * Author : Avinash Lakshman ( alakshman@facebook.com) & Prashant Malik ( pmalik@facebook.com )
+ */
+
+public class TokenUpdateVerbHandler implements IVerbHandler
+{
+    private static Logger logger_ = Logger.getLogger(TokenUpdateVerbHandler.class);
+
+    public void doVerb(Message message)
+    {
+    	byte[] body = (byte[])message.getMessageBody()[0];
+        
+        try
+        {
+            DataInputBuffer bufIn = new DataInputBuffer();
+            bufIn.reset(body, body.length);
+            /* Deserialize to get the token for this endpoint. */
+            TokenUpdater.TokenInfoMessage tiMessage = TokenUpdater.TokenInfoMessage.serializer().deserialize(bufIn);
+            
+            BigInteger token = tiMessage.getToken();
+            logger_.info("Updating the token to [" + token + "]");
+            StorageService.instance().updateToken(token);
+            
+            /* Get the headers for this message */
+            Map<String, byte[]> headers = message.getHeaders();
+            headers.remove( StorageService.getLocalStorageEndPoint().getHost() );
+            logger_.debug("Number of nodes in the header " + headers.size());
+            Set<String> nodes = headers.keySet();
+            
+            for ( String node : nodes )
+            {            
+                logger_.debug("Processing node " + node);
+                byte[] bytes = headers.remove(node);
+                /* Send a message to this node to update its token to the one retreived. */
+                EndPoint target = new EndPoint(node, DatabaseDescriptor.getStoragePort());
+                token = new BigInteger(bytes);
+                
+                /* Reset the new TokenInfoMessage */
+                tiMessage = new TokenUpdater.TokenInfoMessage(target, token );
+                ByteArrayOutputStream bos = new ByteArrayOutputStream();
+                DataOutputStream dos = new DataOutputStream(bos);
+                TokenInfoMessage.serializer().serialize(tiMessage, dos);
+                message.setMessageBody(new Object[]{bos.toByteArray()});
+                
+                logger_.debug("Sending a token update message to " + target + " to update it to " + token);
+                MessagingService.getMessagingInstance().sendOneWay(message, target);
+                break;
+            }                        
+        }
+    	catch( IOException ex )
+    	{
+    		logger_.debug(LogUtil.throwableToString(ex));
+    	}
+    }
+
+}

Added: incubator/cassandra/trunk/src/org/apache/cassandra/tools/TokenUpdater.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/org/apache/cassandra/tools/TokenUpdater.java?rev=749218&view=auto
==============================================================================
--- incubator/cassandra/trunk/src/org/apache/cassandra/tools/TokenUpdater.java (added)
+++ incubator/cassandra/trunk/src/org/apache/cassandra/tools/TokenUpdater.java Mon Mar  2 07:57:22 2009
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.tools;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Serializable;
+import java.math.BigInteger;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.cassandra.io.ICompactSerializer;
+import org.apache.cassandra.net.EndPoint;
+import org.apache.cassandra.net.Message;
+import org.apache.cassandra.net.MessagingService;
+import org.apache.cassandra.service.StorageService;
+import org.apache.cassandra.utils.FBUtilities;
+import org.apache.cassandra.io.*;
+import org.apache.cassandra.utils.*;
+
+public class TokenUpdater
+{
+    private static final int port_ = 7000;
+    private static final long waitTime_ = 10000;
+    
+    public static void main(String[] args) throws Throwable
+    {
+        if ( args.length != 3 )
+        {
+            System.out.println("Usage : java com.facebook.infrastructure.tools.TokenUpdater <ip:port> <token> <file containing node token info>");
+            System.exit(1);
+        }
+        
+        String ipPort = args[0];
+        String token = args[1];
+        String file = args[2];
+        
+        String[] ipPortPair = ipPort.split(":");
+        EndPoint target = new EndPoint(ipPortPair[0], Integer.valueOf(ipPortPair[1]));
+        TokenInfoMessage tiMessage = new TokenInfoMessage( target, new BigInteger(token) );
+        
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        DataOutputStream dos = new DataOutputStream(bos);
+        TokenInfoMessage.serializer().serialize(tiMessage, dos);
+        /* Construct the token update message to be sent */
+        Message tokenUpdateMessage = new Message( new EndPoint(FBUtilities.getHostName(), port_), "", StorageService.tokenVerbHandler_, new Object[]{bos.toByteArray()} );
+        
+        BufferedReader bufReader = new BufferedReader( new InputStreamReader( new FileInputStream(file) ) );
+        String line = null;
+       
+        while ( ( line = bufReader.readLine() ) != null )
+        {
+            String[] nodeTokenPair = line.split(" ");
+            /* Add the node and the token pair into the header of this message. */
+            BigInteger nodeToken = new BigInteger(nodeTokenPair[1]);
+            tokenUpdateMessage.addHeader(nodeTokenPair[0], nodeToken.toByteArray());
+        }
+        
+        System.out.println("Sending a token update message to " + target);
+        MessagingService.getMessagingInstance().sendOneWay(tokenUpdateMessage, target);
+        Thread.sleep(TokenUpdater.waitTime_);
+        System.out.println("Done sending the update message");
+    }
+    
+    public static class TokenInfoMessage implements Serializable
+    {
+        private static ICompactSerializer<TokenInfoMessage> serializer_;
+        private static AtomicInteger idGen_ = new AtomicInteger(0);
+        
+        static
+        {
+            serializer_ = new TokenInfoMessageSerializer();            
+        }
+        
+        static ICompactSerializer<TokenInfoMessage> serializer()
+        {
+            return serializer_;
+        }
+
+        private EndPoint target_;
+        private BigInteger token_;
+        
+        TokenInfoMessage(EndPoint target, BigInteger token)
+        {
+            target_ = target;
+            token_ = token;
+        }
+        
+        EndPoint getTarget()
+        {
+            return target_;
+        }
+        
+        BigInteger getToken()
+        {
+            return token_;
+        }
+    }
+    
+    public static class TokenInfoMessageSerializer implements ICompactSerializer<TokenInfoMessage>
+    {
+        public void serialize(TokenInfoMessage tiMessage, DataOutputStream dos) throws IOException
+        {
+            byte[] node = EndPoint.toBytes( tiMessage.getTarget() );
+            dos.writeInt(node.length);
+            dos.write(node);
+            
+            byte[] token = tiMessage.getToken().toByteArray();
+            dos.writeInt( token.length );
+            dos.write(token);
+        }
+        
+        public TokenInfoMessage deserialize(DataInputStream dis) throws IOException
+        {
+            byte[] target = new byte[dis.readInt()];
+            dis.readFully(target);
+            
+            byte[] token = new byte[dis.readInt()];
+            dis.readFully(token);
+            
+            return new TokenInfoMessage(EndPoint.fromBytes(target), new BigInteger(token));
+        }
+    }
+}

Added: incubator/cassandra/trunk/src/org/apache/cassandra/utils/BasicUtilities.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/org/apache/cassandra/utils/BasicUtilities.java?rev=749218&view=auto
==============================================================================
--- incubator/cassandra/trunk/src/org/apache/cassandra/utils/BasicUtilities.java (added)
+++ incubator/cassandra/trunk/src/org/apache/cassandra/utils/BasicUtilities.java Mon Mar  2 07:57:22 2009
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.utils;
+
+import java.util.*;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor; 
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+
+/**
+ * Author : Avinash Lakshman ( alakshman@facebook.com) & Prashant Malik ( pmalik@facebook.com )
+ */
+
+
+public class BasicUtilities
+{        
+	public static byte[] longToByteArray(long arg)
+	{      
+        byte[] retVal = new byte[8];
+        ByteBuffer bb= ByteBuffer.wrap(retVal);
+        bb.putLong(arg);
+        return retVal; 
+	 }
+	
+	public static long byteArrayToLong(byte[] arg)
+	{
+		ByteBuffer bb= ByteBuffer.wrap(arg);
+		return bb.getLong();
+	}
+	
+	public static byte[] intToByteArray(int arg)
+	{      
+        byte[] retVal = new byte[4];
+        ByteBuffer bb= ByteBuffer.wrap(retVal);
+        bb.putInt(arg);
+        return retVal; 
+	 }
+	
+	public static int byteArrayToInt(byte[] arg)
+	{
+		ByteBuffer bb= ByteBuffer.wrap(arg);
+		return bb.getInt();
+	}
+	
+	public static byte[] shortToByteArray(short arg)
+	{      
+        byte[] retVal = new byte[2];
+        ByteBuffer bb= ByteBuffer.wrap(retVal);
+        bb.putShort(arg);
+        return retVal; 
+	 }
+	
+	public static short byteArrayToShort(byte[] arg)
+	{
+		ByteBuffer bb= ByteBuffer.wrap(arg);
+		return bb.getShort();
+    }
+}

Added: incubator/cassandra/trunk/src/org/apache/cassandra/utils/BitSet.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/org/apache/cassandra/utils/BitSet.java?rev=749218&view=auto
==============================================================================
--- incubator/cassandra/trunk/src/org/apache/cassandra/utils/BitSet.java (added)
+++ incubator/cassandra/trunk/src/org/apache/cassandra/utils/BitSet.java Mon Mar  2 07:57:22 2009
@@ -0,0 +1,1142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.utils;
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.cassandra.io.ICompactSerializer;
+
+
+/**
+ * This class implements a vector of bits that grows as needed. Each component
+ * of the bit set has a <code>boolean</code> value. The bits of a
+ * <code>BitSet</code> are indexed by nonnegative integers. Individual indexed
+ * bits can be examined, set, or cleared. One <code>BitSet</code> may be used
+ * to modify the contents of another <code>BitSet</code> through logical AND,
+ * logical inclusive OR, and logical exclusive OR operations.
+ * <p>
+ * By default, all bits in the set initially have the value <code>false</code>.
+ * <p>
+ * Every bit set has a current size, which is the number of bits of space
+ * currently in use by the bit set. Note that the size is related to the
+ * implementation of a bit set, so it may change with implementation. The length
+ * of a bit set relates to logical length of a bit set and is defined
+ * independently of implementation.
+ * <p>
+ * Unless otherwise noted, passing a null parameter to any of the methods in a
+ * <code>BitSet</code> will result in a <code>NullPointerException</code>.
+ * 
+ * <p>
+ * A <code>BitSet</code> is not safe for multithreaded use without external
+ * synchronization.
+ * 
+ * Author : Avinash Lakshman ( alakshman@facebook.com) & Prashant Malik ( pmalik@facebook.com )
+ */
+public class BitSet implements Cloneable, java.io.Serializable
+{
+    /*
+     * BitSets are packed into arrays of "words." Currently a word is a long,
+     * which consists of 64 bits, requiring 6 address bits. The choice of word
+     * size is determined purely by performance concerns.
+     */
+    private final static int ADDRESS_BITS_PER_WORD = 6;
+    private final static int BITS_PER_WORD = 1 << ADDRESS_BITS_PER_WORD;
+    private final static int BIT_INDEX_MASK = BITS_PER_WORD - 1;
+    
+    /* Used to shift left or right for a partial word mask */
+    private static final long WORD_MASK = 0xffffffffffffffffL;
+    
+    /**
+     * @serialField bits long[]
+     * 
+     * The bits in this BitSet. The ith bit is stored in bits[i/64] at bit
+     * position i % 64 (where bit position 0 refers to the least significant bit
+     * and 63 refers to the most significant bit).
+     */
+    private static final ObjectStreamField[] serialPersistentFields = { new ObjectStreamField(
+            "bits", long[].class), };
+    private static ICompactSerializer<BitSet> serializer_ = new BitSetSerializer();
+    
+    static ICompactSerializer<BitSet> serializer()
+    {
+        return serializer_;
+    }
+    
+    /**
+     * The internal field corresponding to the serialField "bits".
+     */
+    private long[] words;
+    
+    /**
+     * The number of words in the logical size of this BitSet.
+     */
+    private int wordsInUse = 0;
+    
+    /**
+     * Whether the size of "words" is user-specified. If so, we assume the user
+     * knows what he's doing and try harder to preserve it.
+     */
+    private transient boolean sizeIsSticky = false;
+    
+    /* use serialVersionUID from JDK 1.0.2 for interoperability */
+    private static final long serialVersionUID = 7997698588986878753L;
+    
+    /**
+     * Given a bit index, return word index containing it.
+     */
+    private static int wordIndex(int bitIndex)
+    {
+        return bitIndex >> ADDRESS_BITS_PER_WORD;
+    }
+    
+    /**
+     * Every public method must preserve these invariants.
+     */
+    private void checkInvariants()
+    {
+        assert (wordsInUse == 0 || words[wordsInUse - 1] != 0);
+        assert (wordsInUse >= 0 && wordsInUse <= words.length);
+        assert (wordsInUse == words.length || words[wordsInUse] == 0);
+    }
+    
+    /**
+     * Set the field wordsInUse with the logical size in words of the bit set.
+     * WARNING:This method assumes that the number of words actually in use is
+     * less than or equal to the current value of wordsInUse!
+     */
+    private void recalculateWordsInUse()
+    {
+        // Traverse the bitset until a used word is found
+        int i;
+        for (i = wordsInUse - 1; i >= 0; i--)
+            if (words[i] != 0)
+                break;
+        
+        wordsInUse = i + 1; // The new logical size
+    }
+    
+    /**
+     * Creates a new bit set. All bits are initially <code>false</code>.
+     */
+    public BitSet()
+    {
+        initWords(BITS_PER_WORD);
+        sizeIsSticky = false;
+    }
+    
+    /**
+     * Creates a bit set whose initial size is large enough to explicitly
+     * represent bits with indices in the range <code>0</code> through
+     * <code>nbits-1</code>. All bits are initially <code>false</code>.
+     * 
+     * @param nbits
+     *            the initial size of the bit set.
+     * @exception NegativeArraySizeException
+     *                if the specified initial size is negative.
+     */
+    public BitSet(int nbits)
+    {
+        // nbits can't be negative; size 0 is OK
+        if (nbits < 0)
+            throw new NegativeArraySizeException("nbits < 0: " + nbits);
+        
+        initWords(nbits);
+        sizeIsSticky = true;
+    }
+    
+    /**
+     * This version is used only during deserialization.
+     * 
+     * @param words the array which we use to defreeze 
+     *              the BitSet.
+     */
+    BitSet(int wordsInUse, long[] words)
+    {
+        this.wordsInUse = wordsInUse;
+        this.words = words;
+        this.sizeIsSticky = true;
+    }
+    
+    int wordsInUse()
+    {
+        return this.wordsInUse;
+    }
+    
+    long[] words()
+    {
+        return this.words;
+    }
+    
+    private void initWords(int nbits)
+    {
+        words = new long[wordIndex(nbits - 1) + 1];
+    }
+    
+    /**
+     * Ensures that the BitSet can hold enough words.
+     * 
+     * @param wordsRequired
+     *            the minimum acceptable number of words.
+     */
+    private void ensureCapacity(int wordsRequired)
+    {
+        if (words.length < wordsRequired)
+        {
+            // Allocate larger of doubled size or required size
+            int request = Math.max(2 * words.length, wordsRequired);
+            words = Arrays.copyOf(words, request);
+            sizeIsSticky = false;
+        }
+    }
+    
+    /**
+     * Ensures that the BitSet can accommodate a given wordIndex, temporarily
+     * violating the invariants. The caller must restore the invariants before
+     * returning to the user, possibly using recalculateWordsInUse().
+     * 
+     * @param wordIndex
+     *            the index to be accommodated.
+     */
+    private void expandTo(int wordIndex)
+    {
+        int wordsRequired = wordIndex + 1;
+        if (wordsInUse < wordsRequired)
+        {
+            ensureCapacity(wordsRequired);
+            wordsInUse = wordsRequired;
+        }
+    }
+    
+    /**
+     * Checks that fromIndex ... toIndex is a valid range of bit indices.
+     */
+    private static void checkRange(int fromIndex, int toIndex)
+    {
+        if (fromIndex < 0)
+            throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);
+        if (toIndex < 0)
+            throw new IndexOutOfBoundsException("toIndex < 0: " + toIndex);
+        if (fromIndex > toIndex)
+            throw new IndexOutOfBoundsException("fromIndex: " + fromIndex
+                    + " > toIndex: " + toIndex);
+    }
+    
+    /**
+     * Sets the bit at the specified index to the complement of its current
+     * value.
+     * 
+     * @param bitIndex
+     *            the index of the bit to flip.
+     * @exception IndexOutOfBoundsException
+     *                if the specified index is negative.
+     * @since 1.4
+     */
+    public void flip(int bitIndex)
+    {
+        if (bitIndex < 0)
+            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
+        
+        int wordIndex = wordIndex(bitIndex);
+        expandTo(wordIndex);
+        
+        words[wordIndex] ^= (1L << bitIndex);
+        
+        recalculateWordsInUse();
+        checkInvariants();
+    }
+    
+    /**
+     * Sets each bit from the specified <tt>fromIndex</tt> (inclusive) to the
+     * specified <tt>toIndex</tt> (exclusive) to the complement of its current
+     * value.
+     * 
+     * @param fromIndex
+     *            index of the first bit to flip.
+     * @param toIndex
+     *            index after the last bit to flip.
+     * @exception IndexOutOfBoundsException
+     *                if <tt>fromIndex</tt> is negative, or <tt>toIndex</tt>
+     *                is negative, or <tt>fromIndex</tt> is larger than
+     *                <tt>toIndex</tt>.
+     * @since 1.4
+     */
+    public void flip(int fromIndex, int toIndex)
+    {
+        checkRange(fromIndex, toIndex);
+        
+        if (fromIndex == toIndex)
+            return;
+        
+        int startWordIndex = wordIndex(fromIndex);
+        int endWordIndex = wordIndex(toIndex - 1);
+        expandTo(endWordIndex);
+        
+        long firstWordMask = WORD_MASK << fromIndex;
+        long lastWordMask = WORD_MASK >>> -toIndex;
+        if (startWordIndex == endWordIndex)
+        {
+            // Case 1: One word
+            words[startWordIndex] ^= (firstWordMask & lastWordMask);
+        }
+        else
+        {
+            // Case 2: Multiple words
+            // Handle first word
+            words[startWordIndex] ^= firstWordMask;
+            
+            // Handle intermediate words, if any
+            for (int i = startWordIndex + 1; i < endWordIndex; i++)
+                words[i] ^= WORD_MASK;
+            
+            // Handle last word
+            words[endWordIndex] ^= lastWordMask;
+        }
+        
+        recalculateWordsInUse();
+        checkInvariants();
+    }
+    
+    /**
+     * Sets the bit at the specified index to <code>true</code>.
+     * 
+     * @param bitIndex
+     *            a bit index.
+     * @exception IndexOutOfBoundsException
+     *                if the specified index is negative.
+     * @since JDK1.0
+     */
+    public void set(int bitIndex)
+    {
+        if (bitIndex < 0)
+            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
+        
+        int wordIndex = wordIndex(bitIndex);
+        expandTo(wordIndex);
+        
+        words[wordIndex] |= (1L << bitIndex); // Restores invariants
+        
+        checkInvariants();
+    }
+    
+    /**
+     * Sets the bit at the specified index to the specified value.
+     * 
+     * @param bitIndex
+     *            a bit index.
+     * @param value
+     *            a boolean value to set.
+     * @exception IndexOutOfBoundsException
+     *                if the specified index is negative.
+     * @since 1.4
+     */
+    public void set(int bitIndex, boolean value)
+    {
+        if (value)
+            set(bitIndex);
+        else
+            clear(bitIndex);
+    }
+    
+    /**
+     * Sets the bits from the specified <tt>fromIndex</tt> (inclusive) to the
+     * specified <tt>toIndex</tt> (exclusive) to <code>true</code>.
+     * 
+     * @param fromIndex
+     *            index of the first bit to be set.
+     * @param toIndex
+     *            index after the last bit to be set.
+     * @exception IndexOutOfBoundsException
+     *                if <tt>fromIndex</tt> is negative, or <tt>toIndex</tt>
+     *                is negative, or <tt>fromIndex</tt> is larger than
+     *                <tt>toIndex</tt>.
+     * @since 1.4
+     */
+    public void set(int fromIndex, int toIndex)
+    {
+        checkRange(fromIndex, toIndex);
+        
+        if (fromIndex == toIndex)
+            return;
+        
+        // Increase capacity if necessary
+        int startWordIndex = wordIndex(fromIndex);
+        int endWordIndex = wordIndex(toIndex - 1);
+        expandTo(endWordIndex);
+        
+        long firstWordMask = WORD_MASK << fromIndex;
+        long lastWordMask = WORD_MASK >>> -toIndex;
+        if (startWordIndex == endWordIndex)
+        {
+            // Case 1: One word
+            words[startWordIndex] |= (firstWordMask & lastWordMask);
+        }
+        else
+        {
+            // Case 2: Multiple words
+            // Handle first word
+            words[startWordIndex] |= firstWordMask;
+            
+            // Handle intermediate words, if any
+            for (int i = startWordIndex + 1; i < endWordIndex; i++)
+                words[i] = WORD_MASK;
+            
+            // Handle last word (restores invariants)
+            words[endWordIndex] |= lastWordMask;
+        }
+        
+        checkInvariants();
+    }
+    
+    /**
+     * Sets the bits from the specified <tt>fromIndex</tt> (inclusive) to the
+     * specified <tt>toIndex</tt> (exclusive) to the specified value.
+     * 
+     * @param fromIndex
+     *            index of the first bit to be set.
+     * @param toIndex
+     *            index after the last bit to be set
+     * @param value
+     *            value to set the selected bits to
+     * @exception IndexOutOfBoundsException
+     *                if <tt>fromIndex</tt> is negative, or <tt>toIndex</tt>
+     *                is negative, or <tt>fromIndex</tt> is larger than
+     *                <tt>toIndex</tt>.
+     * @since 1.4
+     */
+    public void set(int fromIndex, int toIndex, boolean value)
+    {
+        if (value)
+            set(fromIndex, toIndex);
+        else
+            clear(fromIndex, toIndex);
+    }
+    
+    /**
+     * Sets the bit specified by the index to <code>false</code>.
+     * 
+     * @param bitIndex
+     *            the index of the bit to be cleared.
+     * @exception IndexOutOfBoundsException
+     *                if the specified index is negative.
+     * @since JDK1.0
+     */
+    public void clear(int bitIndex)
+    {
+        if (bitIndex < 0)
+            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
+        
+        int wordIndex = wordIndex(bitIndex);
+        if (wordIndex >= wordsInUse)
+            return;
+        
+        words[wordIndex] &= ~(1L << bitIndex);
+        
+        recalculateWordsInUse();
+        checkInvariants();
+    }
+    
+    /**
+     * Sets the bits from the specified <tt>fromIndex</tt> (inclusive) to the
+     * specified <tt>toIndex</tt> (exclusive) to <code>false</code>.
+     * 
+     * @param fromIndex
+     *            index of the first bit to be cleared.
+     * @param toIndex
+     *            index after the last bit to be cleared.
+     * @exception IndexOutOfBoundsException
+     *                if <tt>fromIndex</tt> is negative, or <tt>toIndex</tt>
+     *                is negative, or <tt>fromIndex</tt> is larger than
+     *                <tt>toIndex</tt>.
+     * @since 1.4
+     */
+    public void clear(int fromIndex, int toIndex)
+    {
+        checkRange(fromIndex, toIndex);
+        
+        if (fromIndex == toIndex)
+            return;
+        
+        int startWordIndex = wordIndex(fromIndex);
+        if (startWordIndex >= wordsInUse)
+            return;
+        
+        int endWordIndex = wordIndex(toIndex - 1);
+        if (endWordIndex >= wordsInUse)
+        {
+            toIndex = length();
+            endWordIndex = wordsInUse - 1;
+        }
+        
+        long firstWordMask = WORD_MASK << fromIndex;
+        long lastWordMask = WORD_MASK >>> -toIndex;
+        if (startWordIndex == endWordIndex)
+        {
+            // Case 1: One word
+            words[startWordIndex] &= ~(firstWordMask & lastWordMask);
+        }
+        else
+        {
+            // Case 2: Multiple words
+            // Handle first word
+            words[startWordIndex] &= ~firstWordMask;
+            
+            // Handle intermediate words, if any
+            for (int i = startWordIndex + 1; i < endWordIndex; i++)
+                words[i] = 0;
+            
+            // Handle last word
+            words[endWordIndex] &= ~lastWordMask;
+        }
+        
+        recalculateWordsInUse();
+        checkInvariants();
+    }
+    
+    /**
+     * Sets all of the bits in this BitSet to <code>false</code>.
+     * 
+     * @since 1.4
+     */
+    public void clear()
+    {
+        while (wordsInUse > 0)
+            words[--wordsInUse] = 0;
+    }
+    
+    /**
+     * Returns the value of the bit with the specified index. The value is
+     * <code>true</code> if the bit with the index <code>bitIndex</code> is
+     * currently set in this <code>BitSet</code>; otherwise, the result is
+     * <code>false</code>.
+     * 
+     * @param bitIndex
+     *            the bit index.
+     * @return the value of the bit with the specified index.
+     * @exception IndexOutOfBoundsException
+     *                if the specified index is negative.
+     */
+    public boolean get(int bitIndex)
+    {
+        if (bitIndex < 0)
+            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);
+        
+        checkInvariants();
+        
+        int wordIndex = wordIndex(bitIndex);
+        return (wordIndex < wordsInUse)
+        && ((words[wordIndex] & (1L << bitIndex)) != 0);
+    }
+    
+    /**
+     * Returns a new <tt>BitSet</tt> composed of bits from this
+     * <tt>BitSet</tt> from <tt>fromIndex</tt> (inclusive) to
+     * <tt>toIndex</tt> (exclusive).
+     * 
+     * @param fromIndex
+     *            index of the first bit to include.
+     * @param toIndex
+     *            index after the last bit to include.
+     * @return a new <tt>BitSet</tt> from a range of this <tt>BitSet</tt>.
+     * @exception IndexOutOfBoundsException
+     *                if <tt>fromIndex</tt> is negative, or <tt>toIndex</tt>
+     *                is negative, or <tt>fromIndex</tt> is larger than
+     *                <tt>toIndex</tt>.
+     * @since 1.4
+     */
+    public BitSet get(int fromIndex, int toIndex)
+    {
+        checkRange(fromIndex, toIndex);
+        
+        checkInvariants();
+        
+        int len = length();
+        
+        // If no set bits in range return empty bitset
+        if (len <= fromIndex || fromIndex == toIndex)
+            return new BitSet(0);
+        
+        // An optimization
+        if (toIndex > len)
+            toIndex = len;
+        
+        BitSet result = new BitSet(toIndex - fromIndex);
+        int targetWords = wordIndex(toIndex - fromIndex - 1) + 1;
+        int sourceIndex = wordIndex(fromIndex);
+        boolean wordAligned = ((fromIndex & BIT_INDEX_MASK) == 0);
+        
+        // Process all words but the last word
+        for (int i = 0; i < targetWords - 1; i++, sourceIndex++)
+            result.words[i] = wordAligned ? words[sourceIndex]
+                                                  : (words[sourceIndex] >>> fromIndex)
+                                                  | (words[sourceIndex + 1] << -fromIndex);
+            
+            // Process the last word
+            long lastWordMask = WORD_MASK >>> -toIndex;
+                                                  result.words[targetWords - 1] = ((toIndex - 1) & BIT_INDEX_MASK) < (fromIndex & BIT_INDEX_MASK) ? /*
+                                                  * straddles
+                                                  * source
+                                                  * words
+                                                  */
+                                                          ((words[sourceIndex] >>> fromIndex) | (words[sourceIndex + 1] & lastWordMask) << -fromIndex)
+                                                          : ((words[sourceIndex] & lastWordMask) >>> fromIndex);
+                                                          
+                                                          // Set wordsInUse correctly
+                                                          result.wordsInUse = targetWords;
+                                                          result.recalculateWordsInUse();
+                                                          result.checkInvariants();
+                                                          
+                                                          return result;
+    }
+    
+    /**
+     * Returns the index of the first bit that is set to <code>true</code>
+     * that occurs on or after the specified starting index. If no such bit
+     * exists then -1 is returned.
+     * 
+     * To iterate over the <code>true</code> bits in a <code>BitSet</code>,
+     * use the following loop:
+     * 
+     * <pre>
+     * for (int i = bs.nextSetBit(0); i &gt;= 0; i = bs.nextSetBit(i + 1))
+     * {
+     *     // operate on index i here
+     * }
+     * </pre>
+     * 
+     * @param fromIndex
+     *            the index to start checking from (inclusive).
+     * @return the index of the next set bit.
+     * @throws IndexOutOfBoundsException
+     *             if the specified index is negative.
+     * @since 1.4
+     */
+    public int nextSetBit(int fromIndex)
+    {
+        if (fromIndex < 0)
+            throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);
+        
+        checkInvariants();
+        
+        int u = wordIndex(fromIndex);
+        if (u >= wordsInUse)
+            return -1;
+        
+        long word = words[u] & (WORD_MASK << fromIndex);
+        
+        while (true)
+        {
+            if (word != 0)
+                return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word);
+            if (++u == wordsInUse)
+                return -1;
+            word = words[u];
+        }
+    }
+    
+    /**
+     * Returns the index of the first bit that is set to <code>false</code>
+     * that occurs on or after the specified starting index.
+     * 
+     * @param fromIndex
+     *            the index to start checking from (inclusive).
+     * @return the index of the next clear bit.
+     * @throws IndexOutOfBoundsException
+     *             if the specified index is negative.
+     * @since 1.4
+     */
+    public int nextClearBit(int fromIndex)
+    {
+        // Neither spec nor implementation handle bitsets of maximal length.
+        // See 4816253.
+        if (fromIndex < 0)
+            throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);
+        
+        checkInvariants();
+        
+        int u = wordIndex(fromIndex);
+        if (u >= wordsInUse)
+            return fromIndex;
+        
+        long word = ~words[u] & (WORD_MASK << fromIndex);
+        
+        while (true)
+        {
+            if (word != 0)
+                return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word);
+            if (++u == wordsInUse)
+                return wordsInUse * BITS_PER_WORD;
+            word = ~words[u];
+        }
+    }
+    
+    /**
+     * Returns the "logical size" of this <code>BitSet</code>: the index of
+     * the highest set bit in the <code>BitSet</code> plus one. Returns zero
+     * if the <code>BitSet</code> contains no set bits.
+     * 
+     * @return the logical size of this <code>BitSet</code>.
+     * @since 1.2
+     */
+    public int length()
+    {
+        if (wordsInUse == 0)
+            return 0;
+        
+        return BITS_PER_WORD
+        * (wordsInUse - 1)
+        + (BITS_PER_WORD - Long
+                .numberOfLeadingZeros(words[wordsInUse - 1]));
+    }
+    
+    /**
+     * Returns true if this <code>BitSet</code> contains no bits that are set
+     * to <code>true</code>.
+     * 
+     * @return boolean indicating whether this <code>BitSet</code> is empty.
+     * @since 1.4
+     */
+    public boolean isEmpty()
+    {
+        return wordsInUse == 0;
+    }
+    
+    /**
+     * Returns true if the specified <code>BitSet</code> has any bits set to
+     * <code>true</code> that are also set to <code>true</code> in this
+     * <code>BitSet</code>.
+     * 
+     * @param set
+     *            <code>BitSet</code> to intersect with
+     * @return boolean indicating whether this <code>BitSet</code> intersects
+     *         the specified <code>BitSet</code>.
+     * @since 1.4
+     */
+    public boolean intersects(BitSet set)
+    {
+        for (int i = Math.min(wordsInUse, set.wordsInUse) - 1; i >= 0; i--)
+            if ((words[i] & set.words[i]) != 0)
+                return true;
+        return false;
+    }
+    
+    /**
+     * Returns the number of bits set to <tt>true</tt> in this
+     * <code>BitSet</code>.
+     * 
+     * @return the number of bits set to <tt>true</tt> in this
+     *         <code>BitSet</code>.
+     * @since 1.4
+     */
+    public int cardinality()
+    {
+        int sum = 0;
+        for (int i = 0; i < wordsInUse; i++)
+            sum += Long.bitCount(words[i]);
+        return sum;
+    }
+    
+    /**
+     * Performs a logical <b>AND</b> of this target bit set with the argument
+     * bit set. This bit set is modified so that each bit in it has the value
+     * <code>true</code> if and only if it both initially had the value
+     * <code>true</code> and the corresponding bit in the bit set argument
+     * also had the value <code>true</code>.
+     * 
+     * @param set
+     *            a bit set.
+     */
+    public void and(BitSet set)
+    {
+        if (this == set)
+            return;
+        
+        while (wordsInUse > set.wordsInUse)
+            words[--wordsInUse] = 0;
+        
+        // Perform logical AND on words in common
+        for (int i = 0; i < wordsInUse; i++)
+            words[i] &= set.words[i];
+        
+        recalculateWordsInUse();
+        checkInvariants();
+    }
+    
+    /**
+     * Performs a logical <b>OR</b> of this bit set with the bit set argument.
+     * This bit set is modified so that a bit in it has the value
+     * <code>true</code> if and only if it either already had the value
+     * <code>true</code> or the corresponding bit in the bit set argument has
+     * the value <code>true</code>.
+     * 
+     * @param set
+     *            a bit set.
+     */
+    public void or(BitSet set)
+    {
+        if (this == set)
+            return;
+        
+        int wordsInCommon = Math.min(wordsInUse, set.wordsInUse);
+        
+        if (wordsInUse < set.wordsInUse)
+        {
+            ensureCapacity(set.wordsInUse);
+            wordsInUse = set.wordsInUse;
+        }
+        
+        // Perform logical OR on words in common
+        for (int i = 0; i < wordsInCommon; i++)
+            words[i] |= set.words[i];
+        
+        // Copy any remaining words
+        if (wordsInCommon < set.wordsInUse)
+            System.arraycopy(set.words, wordsInCommon, words, wordsInCommon,
+                    wordsInUse - wordsInCommon);
+        
+        // recalculateWordsInUse() is unnecessary
+        checkInvariants();
+    }
+    
+    /**
+     * Performs a logical <b>XOR</b> of this bit set with the bit set argument.
+     * This bit set is modified so that a bit in it has the value
+     * <code>true</code> if and only if one of the following statements holds:
+     * <ul>
+     * <li>The bit initially has the value <code>true</code>, and the
+     * corresponding bit in the argument has the value <code>false</code>.
+     * <li>The bit initially has the value <code>false</code>, and the
+     * corresponding bit in the argument has the value <code>true</code>.
+     * </ul>
+     * 
+     * @param set
+     *            a bit set.
+     */
+    public void xor(BitSet set)
+    {
+        int wordsInCommon = Math.min(wordsInUse, set.wordsInUse);
+        
+        if (wordsInUse < set.wordsInUse)
+        {
+            ensureCapacity(set.wordsInUse);
+            wordsInUse = set.wordsInUse;
+        }
+        
+        // Perform logical XOR on words in common
+        for (int i = 0; i < wordsInCommon; i++)
+            words[i] ^= set.words[i];
+        
+        // Copy any remaining words
+        if (wordsInCommon < set.wordsInUse)
+            System.arraycopy(set.words, wordsInCommon, words, wordsInCommon,
+                    set.wordsInUse - wordsInCommon);
+        
+        recalculateWordsInUse();
+        checkInvariants();
+    }
+    
+    /**
+     * Clears all of the bits in this <code>BitSet</code> whose corresponding
+     * bit is set in the specified <code>BitSet</code>.
+     * 
+     * @param set
+     *            the <code>BitSet</code> with which to mask this
+     *            <code>BitSet</code>.
+     * @since 1.2
+     */
+    public void andNot(BitSet set)
+    {
+        // Perform logical (a & !b) on words in common
+        for (int i = Math.min(wordsInUse, set.wordsInUse) - 1; i >= 0; i--)
+            words[i] &= ~set.words[i];
+        
+        recalculateWordsInUse();
+        checkInvariants();
+    }
+    
+    /**
+     * Returns a hash code value for this bit set. The hash code depends only on
+     * which bits have been set within this <code>BitSet</code>. The
+     * algorithm used to compute it may be described as follows.
+     * <p>
+     * Suppose the bits in the <code>BitSet</code> were to be stored in an
+     * array of <code>long</code> integers called, say, <code>words</code>,
+     * in such a manner that bit <code>k</code> is set in the
+     * <code>BitSet</code> (for nonnegative values of <code>k</code>) if
+     * and only if the expression
+     * 
+     * <pre>
+     * ((k &gt;&gt; 6) &lt; words.length) &amp;&amp; ((words[k &gt;&gt; 6] &amp; (1L &lt;&lt; (bit &amp; 0x3F))) != 0)
+     * </pre>
+     * 
+     * is true. Then the following definition of the <code>hashCode</code>
+     * method would be a correct implementation of the actual algorithm:
+     * 
+     * <pre>
+     * public int hashCode()
+     * {
+     *     long h = 1234;
+     *     for (int i = words.length; --i &gt;= 0;)
+     *     {
+     *         h &circ;= words[i] * (i + 1);
+     *     }
+     *     return (int) ((h &gt;&gt; 32) &circ; h);
+     * }
+     * </pre>
+     * 
+     * Note that the hash code values change if the set of bits is altered.
+     * <p>
+     * Overrides the <code>hashCode</code> method of <code>Object</code>.
+     * 
+     * @return a hash code value for this bit set.
+     */
+    public int hashCode()
+    {
+        long h = 1234;
+        for (int i = wordsInUse; --i >= 0;)
+            h ^= words[i] * (i + 1);
+        
+        return (int) ((h >> 32) ^ h);
+    }
+    
+    /**
+     * Returns the number of bits of space actually in use by this
+     * <code>BitSet</code> to represent bit values. The maximum element in the
+     * set is the size - 1st element.
+     * 
+     * @return the number of bits currently in this bit set.
+     */
+    public int size()
+    {
+        return words.length * BITS_PER_WORD;
+    }
+    
+    /**
+     * Compares this object against the specified object. The result is
+     * <code>true</code> if and only if the argument is not <code>null</code>
+     * and is a <code>Bitset</code> object that has exactly the same set of
+     * bits set to <code>true</code> as this bit set. That is, for every
+     * nonnegative <code>int</code> index <code>k</code>,
+     * 
+     * <pre>
+     * ((BitSet) obj).get(k) == this.get(k)
+     * </pre>
+     * 
+     * must be true. The current sizes of the two bit sets are not compared.
+     * <p>
+     * Overrides the <code>equals</code> method of <code>Object</code>.
+     * 
+     * @param obj
+     *            the object to compare with.
+     * @return <code>true</code> if the objects are the same;
+     *         <code>false</code> otherwise.
+     * @see java.util.BitSet#size()
+     */
+    public boolean equals(Object obj)
+    {
+        if (!(obj instanceof BitSet))
+            return false;
+        if (this == obj)
+            return true;
+        
+        BitSet set = (BitSet) obj;
+        
+        checkInvariants();
+        set.checkInvariants();
+        
+        if (wordsInUse != set.wordsInUse)
+            return false;
+        
+        // Check words in use by both BitSets
+        for (int i = 0; i < wordsInUse; i++)
+            if (words[i] != set.words[i])
+                return false;
+        
+        return true;
+    }
+    
+    /**
+     * Cloning this <code>BitSet</code> produces a new <code>BitSet</code>
+     * that is equal to it. The clone of the bit set is another bit set that has
+     * exactly the same bits set to <code>true</code> as this bit set.
+     * 
+     * <p>
+     * Overrides the <code>clone</code> method of <code>Object</code>.
+     * 
+     * @return a clone of this bit set.
+     * @see java.util.BitSet#size()
+     */
+    public Object clone()
+    {
+        if (!sizeIsSticky)
+            trimToSize();
+        
+        try
+        {
+            BitSet result = (BitSet) super.clone();
+            result.words = words.clone();
+            result.checkInvariants();
+            return result;
+        }
+        catch (CloneNotSupportedException e)
+        {
+            throw new InternalError();
+        }
+    }
+    
+    /**
+     * Attempts to reduce internal storage used for the bits in this bit set.
+     * Calling this method may, but is not required to, affect the value
+     * returned by a subsequent call to the {@link #size()} method.
+     */
+    private void trimToSize()
+    {
+        if (wordsInUse != words.length)
+        {
+            words = Arrays.copyOf(words, wordsInUse);
+            checkInvariants();
+        }
+    }
+    
+    /**
+     * Save the state of the <tt>BitSet</tt> instance to a stream (i.e.,
+     * serialize it).
+     */
+    private void writeObject(ObjectOutputStream s) throws IOException
+    {
+        
+        checkInvariants();
+        
+        if (!sizeIsSticky)
+            trimToSize();
+        
+        ObjectOutputStream.PutField fields = s.putFields();
+        fields.put("bits", words);
+        s.writeFields();
+    }
+    
+    /**
+     * Reconstitute the <tt>BitSet</tt> instance from a stream (i.e.,
+     * deserialize it).
+     */
+    private void readObject(ObjectInputStream s) throws IOException,
+    ClassNotFoundException
+    {
+        
+        ObjectInputStream.GetField fields = s.readFields();
+        words = (long[]) fields.get("bits", null);
+        
+        // Assume maximum length then find real length
+        // because recalculateWordsInUse assumes maintenance
+        // or reduction in logical size
+        wordsInUse = words.length;
+        recalculateWordsInUse();
+        sizeIsSticky = (words.length > 0 && words[words.length - 1] == 0L); // heuristic
+        checkInvariants();
+    }
+    
+    /**
+     * Returns a string representation of this bit set. For every index for
+     * which this <code>BitSet</code> contains a bit in the set state, the
+     * decimal representation of that index is included in the result. Such
+     * indices are listed in order from lowest to highest, separated by
+     * ",&nbsp;" (a comma and a space) and surrounded by braces, resulting in
+     * the usual mathematical notation for a set of integers.
+     * <p>
+     * Overrides the <code>toString</code> method of <code>Object</code>.
+     * <p>
+     * Example:
+     * 
+     * <pre>
+     * BitSet drPepper = new BitSet();
+     * </pre>
+     * 
+     * Now <code>drPepper.toString()</code> returns "<code>{}</code>".
+     * <p>
+     * 
+     * <pre>
+     * drPepper.set(2);
+     * </pre>
+     * 
+     * Now <code>drPepper.toString()</code> returns "<code>{2}</code>".
+     * <p>
+     * 
+     * <pre>
+     * drPepper.set(4);
+     * drPepper.set(10);
+     * </pre>
+     * 
+     * Now <code>drPepper.toString()</code> returns "<code>{2, 4, 10}</code>".
+     * 
+     * @return a string representation of this bit set.
+     */
+    public String toString()
+    {
+        checkInvariants();
+        
+        int numBits = (wordsInUse > 128) ? cardinality() : wordsInUse
+                * BITS_PER_WORD;
+        StringBuilder b = new StringBuilder(6 * numBits + 2);
+        b.append('{');
+        
+        int i = nextSetBit(0);
+        if (i != -1)
+        {
+            b.append(i);
+            for (i = nextSetBit(i + 1); i >= 0; i = nextSetBit(i + 1))
+            {
+                int endOfRun = nextClearBit(i);
+                do
+                {
+                    b.append(", ").append(i);
+                }
+                while (++i < endOfRun);
+            }
+        }
+        
+        b.append('}');
+        return b.toString();
+    }
+}
+
+class BitSetSerializer implements ICompactSerializer<BitSet>
+{
+    public void serialize(BitSet bs, DataOutputStream dos) throws IOException
+    {
+        dos.writeInt(bs.wordsInUse());
+        long[] words = bs.words();
+        dos.writeInt(words.length);
+        for ( int i = 0; i < words.length; ++i )
+        {            
+            dos.writeLong(words[i]);
+        }
+    }
+    
+    public BitSet deserialize(DataInputStream dis) throws IOException
+    {
+        int wordsInUse = dis.readInt();
+        int size = dis.readInt();
+        long[] words = new long[size];
+        for ( int i = 0; i < size; ++i )
+        {
+            words[i] = dis.readLong();            
+        }
+        return new BitSet(wordsInUse, words);
+    }
+}

Added: incubator/cassandra/trunk/src/org/apache/cassandra/utils/BloomCalculations.java
URL: http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/org/apache/cassandra/utils/BloomCalculations.java?rev=749218&view=auto
==============================================================================
--- incubator/cassandra/trunk/src/org/apache/cassandra/utils/BloomCalculations.java (added)
+++ incubator/cassandra/trunk/src/org/apache/cassandra/utils/BloomCalculations.java Mon Mar  2 07:57:22 2009
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.utils;
+
+/**
+ * The following calculations are taken from:
+ * http://www.cs.wisc.edu/~cao/papers/summary-cache/node8.html
+ * "Bloom Filters - the math"
+ *
+ * This class's static methods are meant to facilitate the use of the Bloom
+ * Filter class by helping to choose correct values of 'bits per element' and
+ * 'number of hash functions, k'.
+ * Author : Avinash Lakshman ( alakshman@facebook.com) & Prashant Malik ( pmalik@facebook.com )
+ */
+public class BloomCalculations {
+
+    private static final int maxBits = 15;
+    private static final int minBits = 2;
+    private static final int minK = 1;
+    private static final int maxK = 8;
+    private static final int[] optKPerBits =
+            new int[]{1, // dummy K for 0 bits per element
+                      1, // dummy K for 1 bits per element
+                      1, 2, 3, 3, 4, 5, 5, 6, 7, 8, 8, 8, 8, 8};
+
+    /**
+     * In the following table, the row 'i' shows false positive rates if i bits
+     * per element are used.  Column 'j' shows false positive rates if j hash
+     * functions are used.  The first row is 'i=0', the first column is 'j=0'.
+     * Each cell (i,j) the false positive rate determined by using i bits per
+     * element and j hash functions.
+     */
+    private static final double[][] probs = new double[][]{
+        {1.0}, // dummy row representing 0 bits per element
+        {1.0, 1.0}, // dummy row representing 1 bits per element
+        {1.0, 0.393,  0.400},
+        {1.0, 0.283,  0.237,  0.253},
+        {1.0, 0.221,  0.155,  0.147,   0.160},
+        {1.0, 0.181,  0.109,  0.092,   0.092,   0.101},
+        {1.0, 0.154,  0.0804, 0.0609,  0.0561,  0.0578,  0.0638},
+        {1.0, 0.133,  0.0618, 0.0423,  0.0359,  0.0347,  0.0364},
+        {1.0, 0.118,  0.0489, 0.0306,  0.024,   0.0217,  0.0216,  0.0229},
+        {1.0, 0.105,  0.0397, 0.0228,  0.0166,  0.0141,  0.0133,  0.0135,  0.0145},
+        {1.0, 0.0952, 0.0329, 0.0174,  0.0118,  0.00943, 0.00844, 0.00819, 0.00846},
+        {1.0, 0.0869, 0.0276, 0.0136,  0.00864, 0.0065,  0.00552, 0.00513, 0.00509},
+        {1.0, 0.08,   0.0236, 0.0108,  0.00646, 0.00459, 0.00371, 0.00329, 0.00314},
+        {1.0, 0.074,  0.0203, 0.00875, 0.00492, 0.00332, 0.00255, 0.00217, 0.00199},
+        {1.0, 0.0689, 0.0177, 0.00718, 0.00381, 0.00244, 0.00179, 0.00146, 0.00129},
+        {1.0, 0.0645, 0.0156, 0.00596, 0.003,   0.00183, 0.00128, 0.001,   0.000852}
+    };  // the first column is a dummy column representing K=0.
+
+    public static double getFailureRate(int bitsPerElement){
+        int k = computeBestK(bitsPerElement);
+        if(bitsPerElement >= probs.length) bitsPerElement = probs.length-1;
+        return probs[bitsPerElement][k];
+    }
+    
+    /**
+     * Given the number of bits that can be used per element, return the optimal
+     * number of hash functions in order to minimize the false positive rate.
+     *
+     * @param bitsPerElement
+     * @return The number of hash functions that minimize the false positive rate.
+     */
+    public static int computeBestK(int bitsPerElement){
+        if(bitsPerElement < 0)
+            return optKPerBits[0];
+        if(bitsPerElement >= optKPerBits.length)
+            return optKPerBits[optKPerBits.length-1];
+        return optKPerBits[bitsPerElement];
+    }
+
+    /**
+     * A wrapper class that holds two key parameters for a Bloom Filter: the
+     * number of hash functions used, and the number of bits per element used.
+     */
+    public static class BloomSpecification {
+        int K; // number of hash functions.
+        int bitsPerElement;
+    }
+
+    /**
+     * Given a maximum tolerable false positive probability, compute a Bloom
+     * specification which will give less than the specified false positive rate,
+     * but minimize the number of bits per element and the number of hash
+     * functions used.  Because bandwidth (and therefore total bitvector size)
+     * is considered more expensive than computing power, preference is given
+     * to minimizing bits per element rather than number of hash funtions.
+     *
+     * @param maxFalsePosProb The maximum tolerable false positive rate.
+     * @return A Bloom Specification which would result in a false positive rate
+     * less than specified by the function call.
+     */
+    public static BloomSpecification computeBitsAndK(double maxFalsePosProb){
+        BloomSpecification spec = new BloomSpecification();
+        spec.bitsPerElement = 2;
+        spec.K = optKPerBits[spec.bitsPerElement];
+
+        // Handle the trivial cases:
+        if(maxFalsePosProb >= probs[minBits][minK]) return spec;
+        if(maxFalsePosProb < probs[maxBits][maxK]) {
+            spec.bitsPerElement = maxBits;
+            spec.K = maxK;
+            return spec;
+        }
+
+        // First find the minimal required number of bits:
+        while(probs[spec.bitsPerElement][spec.K] > maxFalsePosProb){
+            spec.bitsPerElement++;
+            spec.K = optKPerBits[spec.bitsPerElement];
+        }
+        // Now that the number of bits is sufficient, see if we can relax K
+        // without losing too much precision.
+        while(probs[spec.bitsPerElement][spec.K-1] <= maxFalsePosProb){
+            spec.K--;
+        }
+        return spec;
+    }
+}



Mime
View raw message