cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xe...@apache.org
Subject git commit: update MurmurHash to version 3 patch by Vijay; reviewed by Pavel Yaskevich for CASSANDRA-2975
Date Fri, 16 Mar 2012 23:22:18 GMT
Updated Branches:
  refs/heads/trunk 5ed172053 -> d765b246e


update MurmurHash to version 3
patch by Vijay; reviewed by Pavel Yaskevich for CASSANDRA-2975


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/d765b246
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/d765b246
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/d765b246

Branch: refs/heads/trunk
Commit: d765b246e72e7bb455e9ec9440ae735b1ee8543f
Parents: 5ed1720
Author: Pavel Yaskevich <xedin@apache.org>
Authored: Sat Mar 17 01:52:06 2012 +0300
Committer: Pavel Yaskevich <xedin@apache.org>
Committed: Sat Mar 17 01:56:58 2012 +0300

----------------------------------------------------------------------
 CHANGES.txt                                        |    1 +
 src/java/org/apache/cassandra/db/ColumnIndex.java  |   15 +-
 .../org/apache/cassandra/db/RowIndexEntry.java     |   22 +--
 .../db/columniterator/SSTableNamesIterator.java    |    2 +-
 .../apache/cassandra/io/sstable/Descriptor.java    |   12 +-
 .../apache/cassandra/io/sstable/IndexHelper.java   |   12 +-
 .../io/sstable/SSTableIdentityIterator.java        |    2 +-
 .../apache/cassandra/io/sstable/SSTableReader.java |   16 +--
 .../apache/cassandra/io/sstable/SSTableWriter.java |    9 +-
 .../org/apache/cassandra/utils/BloomFilter.java    |   75 ++------
 .../cassandra/utils/BloomFilterSerializer.java     |    6 +-
 src/java/org/apache/cassandra/utils/Filter.java    |    2 +
 .../org/apache/cassandra/utils/FilterFactory.java  |  147 +++++++++++++++
 .../apache/cassandra/utils/LegacyBloomFilter.java  |    7 +-
 .../utils/LegacyBloomFilterSerializer.java         |   11 +-
 .../apache/cassandra/utils/Murmur2BloomFilter.java |   53 +++++
 .../apache/cassandra/utils/Murmur3BloomFilter.java |   51 +++++
 .../org/apache/cassandra/utils/MurmurHash.java     |  107 +++++++++++-
 .../cassandra/utils/LongBloomFilterTest.java       |   75 ++++++--
 .../cassandra/io/sstable/DescriptorTest.java       |   17 ++-
 .../apache/cassandra/utils/BloomFilterTest.java    |   15 +-
 .../apache/cassandra/utils/FilterTestHelper.java   |    3 +-
 .../cassandra/utils/LegacyBloomFilterTest.java     |    4 +-
 .../apache/cassandra/utils/SerializationsTest.java |   30 ++-
 24 files changed, 529 insertions(+), 165 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 646afd5..8eef80c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,6 +1,7 @@
 1.2-dev
  * Track tombstone expiration and compact when tombstone content is
    higher than a configurable threshold, default 20% (CASSANDRA-3442)
+ * update MurmurHash to version 3 (CASSANDRA-2975)
 
 
 1.1.1-dev

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/db/ColumnIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ColumnIndex.java b/src/java/org/apache/cassandra/db/ColumnIndex.java
index ff3af19..b4c2f90 100644
--- a/src/java/org/apache/cassandra/db/ColumnIndex.java
+++ b/src/java/org/apache/cassandra/db/ColumnIndex.java
@@ -17,9 +17,6 @@
  */
 package org.apache.cassandra.db;
 
-import java.io.DataOutput;
-import java.io.IOError;
-import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -28,23 +25,23 @@ import java.util.List;
 
 import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.io.sstable.IndexHelper;
-import org.apache.cassandra.io.util.DataOutputBuffer;
 import org.apache.cassandra.io.util.IIterableColumns;
-import org.apache.cassandra.utils.BloomFilter;
+import org.apache.cassandra.utils.Filter;
+import org.apache.cassandra.utils.FilterFactory;
 
 public class ColumnIndex
 {
     public final List<IndexHelper.IndexInfo> columnsIndex;
-    public final BloomFilter bloomFilter;
+    public final Filter bloomFilter;
 
-    private static final ColumnIndex EMPTY = new ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), BloomFilter.emptyFilter());
+    private static final ColumnIndex EMPTY = new ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), FilterFactory.emptyFilter());
 
     private ColumnIndex(int estimatedColumnCount)
     {
-        this(new ArrayList<IndexHelper.IndexInfo>(), BloomFilter.getFilter(estimatedColumnCount, 4));
+        this(new ArrayList<IndexHelper.IndexInfo>(), FilterFactory.getFilter(estimatedColumnCount, 4));
     }
 
-    private ColumnIndex(List<IndexHelper.IndexInfo> columnsIndex, BloomFilter bloomFilter)
+    private ColumnIndex(List<IndexHelper.IndexInfo> columnsIndex, Filter bloomFilter)
     {
         this.columnsIndex = columnsIndex;
         this.bloomFilter = bloomFilter;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/db/RowIndexEntry.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/RowIndexEntry.java b/src/java/org/apache/cassandra/db/RowIndexEntry.java
index 46c6604..0b910c8 100644
--- a/src/java/org/apache/cassandra/db/RowIndexEntry.java
+++ b/src/java/org/apache/cassandra/db/RowIndexEntry.java
@@ -24,14 +24,11 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
-import org.apache.cassandra.io.ISerializer;
 import org.apache.cassandra.io.sstable.Descriptor;
 import org.apache.cassandra.io.sstable.IndexHelper;
-import org.apache.cassandra.io.util.FileDataInput;
 import org.apache.cassandra.io.util.FileUtils;
-import org.apache.cassandra.service.StorageService;
-import org.apache.cassandra.utils.BloomFilter;
-import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.cassandra.utils.Filter;
+import org.apache.cassandra.utils.FilterFactory;
 
 public class RowIndexEntry
 {
@@ -67,7 +64,7 @@ public class RowIndexEntry
         return Collections.<IndexHelper.IndexInfo>emptyList();
     }
 
-    public BloomFilter bloomFilter()
+    public Filter bloomFilter()
     {
         throw new UnsupportedOperationException();
     }
@@ -85,7 +82,7 @@ public class RowIndexEntry
                 dos.writeInt(rie.columnsIndex().size());
                 for (IndexHelper.IndexInfo info : rie.columnsIndex())
                     info.serialize(dos);
-                BloomFilter.serializer().serialize(rie.bloomFilter(), dos);
+                FilterFactory.serialize(rie.bloomFilter(), dos);
             }
             else
             {
@@ -107,7 +104,7 @@ public class RowIndexEntry
                     List<IndexHelper.IndexInfo> columnsIndex = new ArrayList<IndexHelper.IndexInfo>(entries);
                     for (int i = 0; i < entries; i++)
                         columnsIndex.add(IndexHelper.IndexInfo.deserialize(dis));
-                    BloomFilter bf = BloomFilter.serializer().deserialize(dis);
+                    Filter bf = FilterFactory.deserialize(dis, descriptor.filterType);
                     return new IndexedEntry(position, new DeletionInfo(mfda, ldt), columnsIndex, bf);
                 }
                 else
@@ -142,9 +139,9 @@ public class RowIndexEntry
     {
         private final DeletionInfo deletionInfo;
         private final List<IndexHelper.IndexInfo> columnsIndex;
-        private final BloomFilter bloomFilter;
+        private final Filter bloomFilter;
 
-        private IndexedEntry(long position, DeletionInfo deletionInfo, List<IndexHelper.IndexInfo> columnsIndex, BloomFilter bloomFilter)
+        private IndexedEntry(long position, DeletionInfo deletionInfo, List<IndexHelper.IndexInfo> columnsIndex, Filter bloomFilter)
         {
             super(position);
             assert deletionInfo != null;
@@ -167,7 +164,7 @@ public class RowIndexEntry
         }
 
         @Override
-        public BloomFilter bloomFilter()
+        public Filter bloomFilter()
         {
             return bloomFilter;
         }
@@ -178,7 +175,8 @@ public class RowIndexEntry
             size += DBConstants.INT_SIZE; // number of entries
             for (IndexHelper.IndexInfo info : columnsIndex)
                 size += info.serializedSize();
-            return size + (int)bloomFilter.serializedSize();
+
+            return size + (int) FilterFactory.serializedSize(bloomFilter);
         }
     }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
index 4d3a148..068933c 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
@@ -134,7 +134,7 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement
         else
         {
             assert file != null;
-            bf = IndexHelper.defreezeBloomFilter(file, sstable.descriptor.usesOldBloomFilter);
+            bf = IndexHelper.defreezeBloomFilter(file, sstable.descriptor.filterType);
             indexList = IndexHelper.deserializeIndex(file);
         }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/io/sstable/Descriptor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/Descriptor.java b/src/java/org/apache/cassandra/io/sstable/Descriptor.java
index 4c9ab2d..dff0e98 100644
--- a/src/java/org/apache/cassandra/io/sstable/Descriptor.java
+++ b/src/java/org/apache/cassandra/io/sstable/Descriptor.java
@@ -22,6 +22,7 @@ import java.util.StringTokenizer;
 
 import com.google.common.base.Objects;
 
+import org.apache.cassandra.utils.FilterFactory;
 import org.apache.cassandra.utils.Pair;
 
 import static org.apache.cassandra.io.sstable.Component.separator;
@@ -55,7 +56,7 @@ public class Descriptor
     // hc (1.0.4): records partitioner in metadata component
     // ia (1.2.0): column indexes are promoted to the index file
     //             records estimated histogram of deletion times in tombstones
-    public static final String CURRENT_VERSION = "ia";
+    public static final String CURRENT_VERSION = "ib";
 
     public final File directory;
     /** version has the following format: <code>[a-z]+</code> */
@@ -70,13 +71,13 @@ public class Descriptor
     public final boolean hasIntRowSize;
     public final boolean hasEncodedKeys;
     public final boolean isLatestVersion;
-    public final boolean usesOldBloomFilter;
     public final boolean metadataIncludesReplayPosition;
     public final boolean tracksMaxTimestamp;
     public final boolean hasCompressionRatio;
     public final boolean hasPartitioner;
     public final boolean tracksTombstones;
     public final boolean hasPromotedIndexes;
+    public final FilterFactory.Type filterType;
 
     /**
      * A descriptor that assumes CURRENT_VERSION.
@@ -100,7 +101,6 @@ public class Descriptor
         hasStringsInBloomFilter = version.compareTo("c") < 0;
         hasIntRowSize = version.compareTo("d") < 0;
         hasEncodedKeys = version.compareTo("e") < 0;
-        usesOldBloomFilter = version.compareTo("f") < 0;
         metadataIncludesReplayPosition = version.compareTo("g") >= 0;
         tracksMaxTimestamp = version.compareTo("h") >= 0;
         hasCompressionRatio = version.compareTo("hb") >= 0;
@@ -108,6 +108,12 @@ public class Descriptor
         tracksTombstones = version.compareTo("ia") >= 0;
         hasPromotedIndexes = version.compareTo("ia") >= 0;
         isLatestVersion = version.compareTo(CURRENT_VERSION) == 0;
+        if (version.compareTo("f") < 0)
+            filterType = FilterFactory.Type.SHA;
+        else if (version.compareTo("ia") <= 0)
+            filterType = FilterFactory.Type.MURMUR2;
+        else
+            filterType = FilterFactory.Type.MURMUR3;
     }
 
     public String filenameFor(Component component)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/io/sstable/IndexHelper.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/IndexHelper.java b/src/java/org/apache/cassandra/io/sstable/IndexHelper.java
index 798dc19..5c7d645 100644
--- a/src/java/org/apache/cassandra/io/sstable/IndexHelper.java
+++ b/src/java/org/apache/cassandra/io/sstable/IndexHelper.java
@@ -104,9 +104,9 @@ public class IndexHelper
         return indexList;
     }
 
-    public static Filter defreezeBloomFilter(FileDataInput file, boolean usesOldBloomFilter) throws IOException
+    public static Filter defreezeBloomFilter(FileDataInput file, FilterFactory.Type type) throws IOException
     {
-        return defreezeBloomFilter(file, Integer.MAX_VALUE, usesOldBloomFilter);
+        return defreezeBloomFilter(file, Integer.MAX_VALUE, type);
     }
 
     /**
@@ -114,14 +114,14 @@ public class IndexHelper
      *
      * @param file - source file
      * @param maxSize - sanity check: if filter claimes to be larger than this it is bogus
-     * @param useOldBuffer - do we need to reuse old buffer?
+     * @param type - Bloom Filter type.
      *
      * @return bloom filter summarizing the column information
      * @throws java.io.IOException if an I/O error occurs.
      * Guarantees that file's current position will be just after the bloom filter, even if
      * the filter cannot be deserialized, UNLESS EOFException is thrown.
      */
-    public static Filter defreezeBloomFilter(FileDataInput file, long maxSize, boolean useOldBuffer) throws IOException
+    public static Filter defreezeBloomFilter(FileDataInput file, long maxSize, FilterFactory.Type type) throws IOException
     {
         int size = file.readInt();
         if (size > maxSize || size <= 0)
@@ -129,9 +129,7 @@ public class IndexHelper
         ByteBuffer bytes = file.readBytes(size);
 
         DataInputStream stream = new DataInputStream(ByteBufferUtil.inputStream(bytes));
-        return useOldBuffer
-               ? LegacyBloomFilter.serializer().deserialize(stream)
-               : BloomFilter.serializer().deserialize(stream);
+        return FilterFactory.deserialize(stream, type);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
index 63b1322..ca04ce9 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
@@ -116,7 +116,7 @@ public class SSTableIdentityIterator implements Comparable<SSTableIdentityIterat
                 {
                     try
                     {
-                        IndexHelper.defreezeBloomFilter(file, dataSize, sstable.descriptor.usesOldBloomFilter);
+                        IndexHelper.defreezeBloomFilter(file, dataSize, sstable.descriptor.filterType);
                     }
                     catch (Exception e)
                     {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
index a41d2a7..864e11f 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
@@ -309,7 +309,7 @@ public class SSTableReader extends SSTable
     {
         if (!components.contains(Component.FILTER))
         {
-            bf = BloomFilter.emptyFilter();
+            bf = FilterFactory.emptyFilter();
             return;
         }
 
@@ -317,14 +317,7 @@ public class SSTableReader extends SSTable
         try
         {
             stream = new DataInputStream(new BufferedInputStream(new FileInputStream(descriptor.filenameFor(Component.FILTER))));
-            if (descriptor.usesOldBloomFilter)
-            {
-                bf = LegacyBloomFilter.serializer().deserialize(stream);
-            }
-            else
-            {
-                bf = BloomFilter.serializer().deserialize(stream);
-            }
+            bf = FilterFactory.deserialize(stream, descriptor.filterType);
         }
         finally
         {
@@ -455,10 +448,7 @@ public class SSTableReader extends SSTable
 
     public long getBloomFilterSerializedSize()
     {
-        if (descriptor.usesOldBloomFilter)
-            return LegacyBloomFilter.serializer().serializedSize((LegacyBloomFilter) bf);
-        else
-            return BloomFilter.serializer().serializedSize((BloomFilter) bf);
+        return FilterFactory.serializedSize(bf, descriptor.filterType);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
index 9144044..55182d9 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
@@ -368,7 +368,7 @@ public class SSTableWriter extends SSTable
         private final SequentialWriter indexFile;
         public final SegmentedFile.Builder builder;
         public final IndexSummary summary;
-        public final BloomFilter bf;
+        public final Filter bf;
         private FileMark mark;
 
         IndexWriter(long keyCount) throws IOException
@@ -384,9 +384,8 @@ public class SSTableWriter extends SSTable
                 logger.error("Bloom filter FP chance of zero isn't supposed to happen");
                 fpChance = null;
             }
-            bf = fpChance == null
-               ? BloomFilter.getFilter(keyCount, 15)
-               : BloomFilter.getFilter(keyCount, fpChance);
+            bf = fpChance == null ? FilterFactory.getFilter(keyCount, 15)
+                                  : FilterFactory.getFilter(keyCount, fpChance);
         }
 
         public void append(DecoratedKey key, RowIndexEntry indexEntry) throws IOException
@@ -410,7 +409,7 @@ public class SSTableWriter extends SSTable
             // bloom filter
             FileOutputStream fos = new FileOutputStream(descriptor.filenameFor(SSTable.COMPONENT_FILTER));
             DataOutputStream stream = new DataOutputStream(fos);
-            BloomFilter.serializer().serialize(bf, stream);
+            FilterFactory.serialize(bf, stream, descriptor.filterType);
             stream.flush();
             fos.getFD().sync();
             stream.close();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/BloomFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/BloomFilter.java b/src/java/org/apache/cassandra/utils/BloomFilter.java
index f664264..3ca62e5 100644
--- a/src/java/org/apache/cassandra/utils/BloomFilter.java
+++ b/src/java/org/apache/cassandra/utils/BloomFilter.java
@@ -19,91 +19,45 @@ package org.apache.cassandra.utils;
 
 import java.nio.ByteBuffer;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.cassandra.utils.obs.OpenBitSet;
 
-public class BloomFilter extends Filter
+public abstract class BloomFilter extends Filter
 {
-    private static final Logger logger = LoggerFactory.getLogger(BloomFilter.class);
     private static final int EXCESS = 20;
-    static final BloomFilterSerializer serializer = new BloomFilterSerializer();
 
     public final OpenBitSet bitset;
 
-    BloomFilter(int hashes, OpenBitSet bs)
+    BloomFilter(int hashes, long numElements, int bucketsPer)
     {
         hashCount = hashes;
-        bitset = bs;
-    }
-
-    public static BloomFilter emptyFilter()
-    {
-        return new BloomFilter(0, bucketsFor(0, 0));
-    }
-
-    public static BloomFilterSerializer serializer()
-    {
-        return serializer;
+        bitset = new OpenBitSet(numElements * bucketsPer + EXCESS);
     }
 
-    private static OpenBitSet bucketsFor(long numElements, int bucketsPer)
+    BloomFilter(int hashes, OpenBitSet bitset)
     {
-        return new OpenBitSet(numElements * bucketsPer + EXCESS);
-    }
-
-    /**
-    * @return A BloomFilter with the lowest practical false positive probability
-    * for the given number of elements.
-    */
-    public static BloomFilter getFilter(long numElements, int targetBucketsPerElem)
-    {
-        int maxBucketsPerElement = Math.max(1, BloomCalculations.maxBucketsPerElement(numElements));
-        int bucketsPerElement = Math.min(targetBucketsPerElem, maxBucketsPerElement);
-        if (bucketsPerElement < targetBucketsPerElem)
-        {
-            logger.warn(String.format("Cannot provide an optimal BloomFilter for %d elements (%d/%d buckets per element).",
-                                      numElements, bucketsPerElement, targetBucketsPerElem));
-        }
-        BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement);
-        if (logger.isTraceEnabled())
-            logger.trace("Creating bloom filter for {} elements and spec {}", numElements, spec);
-        return new BloomFilter(spec.K, bucketsFor(numElements, spec.bucketsPerElement));
-    }
-
-    /**
-    * @return The smallest BloomFilter that can provide the given false positive
-    * probability rate for the given number of elements.
-    *
-    * Asserts that the given probability can be satisfied using this filter.
-    */
-    public static BloomFilter getFilter(long numElements, double maxFalsePosProbability)
-    {
-        assert maxFalsePosProbability <= 1.0 : "Invalid probability";
-        int bucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
-        BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement, maxFalsePosProbability);
-        return new BloomFilter(spec.K, bucketsFor(numElements, spec.bucketsPerElement));
+        this.hashCount = hashes;
+        this.bitset = bitset;
     }
 
     private long[] getHashBuckets(ByteBuffer key)
     {
-        return BloomFilter.getHashBuckets(key, hashCount, bitset.size());
+        return getHashBuckets(key, hashCount, bitset.size());
     }
 
+    protected abstract long[] hash(ByteBuffer b, int position, int remaining, long seed);
+
     // Murmur is faster than an SHA-based approach and provides as-good collision
     // resistance.  The combinatorial generation approach described in
     // http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
     // does prove to work in actual tests, and is obviously faster
     // than performing further iterations of murmur.
-    static long[] getHashBuckets(ByteBuffer b, int hashCount, long max)
+    long[] getHashBuckets(ByteBuffer b, int hashCount, long max)
     {
         long[] result = new long[hashCount];
-        long hash1 = MurmurHash.hash64(b, b.position(), b.remaining(), 0L);
-        long hash2 = MurmurHash.hash64(b, b.position(), b.remaining(), hash1);
+        long[] hash = this.hash(b, b.position(), b.remaining(), 0L);
         for (int i = 0; i < hashCount; ++i)
         {
-            result[i] = Math.abs((hash1 + (long)i * hash2) % max);
+            result[i] = Math.abs((hash[0] + (long)i * hash[1]) % max);
         }
         return result;
     }
@@ -132,9 +86,4 @@ public class BloomFilter extends Filter
     {
         bitset.clear(0, bitset.size());
     }
-
-    public long serializedSize()
-    {
-        return serializer.serializedSize(this);
-    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/BloomFilterSerializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/BloomFilterSerializer.java b/src/java/org/apache/cassandra/utils/BloomFilterSerializer.java
index c0834a6..4d3c4af 100644
--- a/src/java/org/apache/cassandra/utils/BloomFilterSerializer.java
+++ b/src/java/org/apache/cassandra/utils/BloomFilterSerializer.java
@@ -25,7 +25,7 @@ import org.apache.cassandra.db.DBConstants;
 import org.apache.cassandra.io.ISerializer;
 import org.apache.cassandra.utils.obs.OpenBitSet;
 
-public class BloomFilterSerializer implements ISerializer<BloomFilter>
+abstract class BloomFilterSerializer implements ISerializer<BloomFilter>
 {
     public void serialize(BloomFilter bf, DataOutput dos) throws IOException
     {
@@ -59,9 +59,11 @@ public class BloomFilterSerializer implements ISerializer<BloomFilter>
                 bits[i] = dis.readLong();
         }
 
-        return new BloomFilter(hashes, bs);
+        return createFilter(hashes, bs);
     }
 
+    protected abstract BloomFilter createFilter(int hashes, OpenBitSet bs);
+
     /**
      * Calculates a serialized size of the given Bloom Filter
      * @see BloomFilterSerializer#serialize(BloomFilter, DataOutput)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/Filter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/Filter.java b/src/java/org/apache/cassandra/utils/Filter.java
index 138cb31..f7ce1f3 100644
--- a/src/java/org/apache/cassandra/utils/Filter.java
+++ b/src/java/org/apache/cassandra/utils/Filter.java
@@ -31,4 +31,6 @@ public abstract class Filter
     public abstract void add(ByteBuffer key);
 
     public abstract boolean isPresent(ByteBuffer key);
+
+    public abstract void clear();
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/FilterFactory.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/FilterFactory.java b/src/java/org/apache/cassandra/utils/FilterFactory.java
new file mode 100644
index 0000000..93c642f
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/FilterFactory.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.utils;
+
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class FilterFactory
+{
+    private static final Logger logger = LoggerFactory.getLogger(FilterFactory.class);
+
+    public enum Type
+    {
+        SHA, MURMUR2, MURMUR3
+    }
+
+    public static void serialize(Filter bf, DataOutput output) throws IOException
+    {
+        serialize(bf, output, Type.MURMUR3);
+    }
+
+    public static void serialize(Filter bf, DataOutput output, Type type) throws IOException
+    {
+        switch (type)
+        {
+            case SHA:
+                LegacyBloomFilter.serializer.serialize((LegacyBloomFilter) bf, output);
+                break;
+            case MURMUR2:
+                Murmur2BloomFilter.serializer.serialize((Murmur2BloomFilter) bf, output);
+                break;
+            default:
+                Murmur3BloomFilter.serializer.serialize((Murmur3BloomFilter) bf, output);
+                break;
+        }
+    }
+
+    public static Filter deserialize(DataInput input, Type type) throws IOException
+    {
+        switch (type)
+        {
+            case SHA:
+                return LegacyBloomFilter.serializer.deserialize(input);
+            case MURMUR2:
+                return Murmur2BloomFilter.serializer.deserialize(input);
+            default:
+                return Murmur3BloomFilter.serializer.deserialize(input);
+        }
+    }
+
+    public static long serializedSize(Filter bf)
+    {
+        return serializedSize(bf, Type.MURMUR3);
+    }
+
+    public static long serializedSize(Filter bf, Type type)
+    {
+        switch (type)
+        {
+            case SHA:
+                return LegacyBloomFilter.serializer.serializedSize((LegacyBloomFilter) bf);
+            case MURMUR2:
+                return Murmur2BloomFilter.serializer.serializedSize((Murmur2BloomFilter) bf);
+            default:
+                return Murmur3BloomFilter.serializer.serializedSize((Murmur3BloomFilter) bf);
+        }
+    }
+
+    /**
+     * @return A BloomFilter with the lowest practical false positive
+     *         probability for the given number of elements.
+     */
+    public static Filter getFilter(long numElements, int targetBucketsPerElem)
+    {
+        return getFilter(numElements, targetBucketsPerElem, Type.MURMUR3);
+    }
+
+    // helper method for test.
+    static Filter getFilter(long numElements, int targetBucketsPerElem, Type type)
+    {
+        int maxBucketsPerElement = Math.max(1, BloomCalculations.maxBucketsPerElement(numElements));
+        int bucketsPerElement = Math.min(targetBucketsPerElem, maxBucketsPerElement);
+        if (bucketsPerElement < targetBucketsPerElem)
+        {
+            logger.warn(String.format("Cannot provide an optimal BloomFilter for %d elements (%d/%d buckets per element).", numElements, bucketsPerElement, targetBucketsPerElem));
+        }
+        BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement);
+        return createFilter(spec.K, numElements, spec.bucketsPerElement, type);
+    }
+
+    /**
+     * @return The smallest BloomFilter that can provide the given false
+     *         positive probability rate for the given number of elements.
+     *
+     *         Asserts that the given probability can be satisfied using this
+     *         filter.
+     */
+    public static Filter getFilter(long numElements, double maxFalsePosProbability)
+    {
+        return getFilter(numElements, maxFalsePosProbability, Type.MURMUR3);
+    }
+
+    // helper method for test.
+    static Filter getFilter(long numElements, double maxFalsePosProbability, Type type)
+    {
+        assert maxFalsePosProbability <= 1.0 : "Invalid probability";
+        int bucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
+        BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement, maxFalsePosProbability);
+        return createFilter(spec.K, numElements, spec.bucketsPerElement, type);
+    }
+
+    private static Filter createFilter(int hash, long numElements, int bucketsPer, Type type)
+    {
+        switch (type)
+        {
+            case MURMUR2:
+              return new Murmur2BloomFilter(hash, numElements, bucketsPer);
+            default:
+              return new Murmur3BloomFilter(hash, numElements, bucketsPer);
+        }
+    }
+
+    public static BloomFilter emptyFilter()
+    {
+        return new Murmur3BloomFilter(0, 0, 0);
+    }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/LegacyBloomFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/LegacyBloomFilter.java b/src/java/org/apache/cassandra/utils/LegacyBloomFilter.java
index 9942fbc..6f7269e 100644
--- a/src/java/org/apache/cassandra/utils/LegacyBloomFilter.java
+++ b/src/java/org/apache/cassandra/utils/LegacyBloomFilter.java
@@ -27,12 +27,7 @@ public class LegacyBloomFilter extends Filter
 {
     private static final int EXCESS = 20;
     private static final Logger logger = LoggerFactory.getLogger(LegacyBloomFilter.class);
-    static final LegacyBloomFilterSerializer serializer = new LegacyBloomFilterSerializer();
-
-    public static LegacyBloomFilterSerializer serializer()
-    {
-        return serializer;
-    }
+    public static final LegacyBloomFilterSerializer serializer = new LegacyBloomFilterSerializer();
 
     private BitSet filter;
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/LegacyBloomFilterSerializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/LegacyBloomFilterSerializer.java b/src/java/org/apache/cassandra/utils/LegacyBloomFilterSerializer.java
index a556876..a0de19b 100644
--- a/src/java/org/apache/cassandra/utils/LegacyBloomFilterSerializer.java
+++ b/src/java/org/apache/cassandra/utils/LegacyBloomFilterSerializer.java
@@ -32,10 +32,17 @@ public class LegacyBloomFilterSerializer
 //        oos.flush();
     }
 
-    public LegacyBloomFilter deserialize(DataInputStream dis) throws IOException
+    public LegacyBloomFilter deserialize(final DataInput dis) throws IOException
     {
         int hashes = dis.readInt();
-        ObjectInputStream ois = new ObjectInputStream(dis);
+        ObjectInputStream ois = new ObjectInputStream(new InputStream()
+        {
+            @Override
+            public int read() throws IOException
+            {
+                return dis.readByte() & 0xFF;
+            }
+        });
         try
         {
           BitSet bs = (BitSet) ois.readObject();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/Murmur2BloomFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/Murmur2BloomFilter.java b/src/java/org/apache/cassandra/utils/Murmur2BloomFilter.java
new file mode 100644
index 0000000..df5a160
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/Murmur2BloomFilter.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.utils;
+
+import java.nio.ByteBuffer;
+
+import org.apache.cassandra.io.ISerializer;
+import org.apache.cassandra.utils.obs.OpenBitSet;
+
+public class Murmur2BloomFilter extends BloomFilter
+{
+    public static final ISerializer<BloomFilter> serializer = new Murmur2BloomFilterSerializer();
+
+    Murmur2BloomFilter(int hashes, long numElements, int bucketsPer)
+    {
+        super(hashes, numElements, bucketsPer);
+    }
+
+    private Murmur2BloomFilter(int hashes, OpenBitSet bs)
+    {
+        super(hashes, bs);
+    }
+
+    protected long[] hash(ByteBuffer b, int position, int remaining, long seed)
+    {
+        long hash1 = MurmurHash.hash2_64(b, b.position(), b.remaining(), seed);
+        long hash2 = MurmurHash.hash2_64(b, b.position(), b.remaining(), hash1);
+        return (new long[] { hash1, hash2 });
+    }
+
+    private static class Murmur2BloomFilterSerializer extends BloomFilterSerializer
+    {
+        protected BloomFilter createFilter(int hashes, OpenBitSet bs)
+        {
+            return new Murmur2BloomFilter(hashes, bs);
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/Murmur3BloomFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/Murmur3BloomFilter.java b/src/java/org/apache/cassandra/utils/Murmur3BloomFilter.java
new file mode 100644
index 0000000..304842a
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/Murmur3BloomFilter.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.utils;
+
+import java.nio.ByteBuffer;
+
+import org.apache.cassandra.io.ISerializer;
+import org.apache.cassandra.utils.obs.OpenBitSet;
+
+public class Murmur3BloomFilter extends BloomFilter
+{
+    public static final ISerializer<BloomFilter> serializer = new Murmur3BloomFilterSerializer();
+
+    Murmur3BloomFilter(int hashes, long numElements, int bucketsPer)
+    {
+        super(hashes, numElements, bucketsPer);
+    }
+
+    private Murmur3BloomFilter(int hashes, OpenBitSet bs)
+    {
+        super(hashes, bs);
+    }
+
+    protected long[] hash(ByteBuffer b, int position, int remaining, long seed)
+    {
+        return MurmurHash.hash3_x64_128(b, b.position(), b.remaining(), seed);
+    }
+
+    private static class Murmur3BloomFilterSerializer extends BloomFilterSerializer
+    {
+        protected BloomFilter createFilter(int hashes, OpenBitSet bs)
+        {
+            return new Murmur3BloomFilter(hashes, bs);
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/MurmurHash.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/MurmurHash.java b/src/java/org/apache/cassandra/utils/MurmurHash.java
index 36f9ab5..d2a3f37 100644
--- a/src/java/org/apache/cassandra/utils/MurmurHash.java
+++ b/src/java/org/apache/cassandra/utils/MurmurHash.java
@@ -23,6 +23,9 @@ import java.nio.ByteBuffer;
  * This is a very fast, non-cryptographic hash suitable for general hash-based
  * lookup. See http://murmurhash.googlepages.com/ for more details.
  *
+ * hash32() and hash64() are MurmurHash 2.0.
+ * hash3_x64_128() is MurmurHash 3.0.
+ *
  * <p>
  * The C version of MurmurHash 2.0 found at that site was ported to Java by
  * Andrzej Bialecki (ab at getopt org).
@@ -85,7 +88,7 @@ public class MurmurHash
         return h;
     }
 
-    public static long hash64(ByteBuffer key, int offset, int length, long seed)
+    public static long hash2_64(ByteBuffer key, int offset, int length, long seed)
     {
         long m64 = 0xc6a4a7935bd1e995L;
         int r64 = 47;
@@ -140,4 +143,106 @@ public class MurmurHash
 
         return h64;
     }
+
+    protected static long getblock(ByteBuffer key, int offset, int index)
+    {
+        int i_8 = index << 3;
+        return ((long) key.get(offset + i_8 + 0) & 0xff) + (((long) key.get(offset + i_8 + 1) & 0xff) << 8) +
+               (((long) key.get(offset + i_8 + 2) & 0xff) << 16) + (((long) key.get(offset + i_8 + 3) & 0xff) << 24) +
+               (((long) key.get(offset + i_8 + 4) & 0xff) << 32) + (((long) key.get(offset + i_8 + 5) & 0xff) << 40) +
+               (((long) key.get(offset + i_8 + 6) & 0xff) << 48) + (((long) key.get(offset + i_8 + 7) & 0xff) << 56);
+    }
+
+    protected static long rotl64(long v, int n)
+    {
+        return ((v << n) | (v >>> (64 - n)));
+    }
+
+    protected static long fmix(long k)
+    {
+        k ^= k >>> 33;
+        k *= 0xff51afd7ed558ccdL;
+        k ^= k >>> 33;
+        k *= 0xc4ceb9fe1a85ec53L;
+        k ^= k >>> 33;
+
+        return k;
+    }
+
+    public static long[] hash3_x64_128(ByteBuffer key, int offset, int length, long seed)
+    {
+        final int nblocks = length >> 4; // Process as 128-bit blocks.
+
+        long h1 = seed;
+        long h2 = seed;
+
+        long c1 = 0x87c37b91114253d5L;
+        long c2 = 0x4cf5ad432745937fL;
+
+        //----------
+        // body
+
+        for(int i = 0; i < nblocks; i++)
+        {
+            int i_8 = i << 4;
+
+            long k1 = getblock(key, offset, i*2+0);
+            long k2 = getblock(key, offset, i*2+1);
+
+            k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1;
+
+            h1 = rotl64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+            k2 *= c2; k2  = rotl64(k2,33); k2 *= c1; h2 ^= k2;
+
+            h2 = rotl64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+        }
+
+        //----------
+        // tail
+
+        // Advance offset to the unprocessed tail of the data.
+        offset += nblocks * 16;
+
+        long k1 = 0;
+        long k2 = 0;
+
+        switch(length & 15)
+        {
+            case 15: k2 ^= ((long) key.get(offset+14)) << 48;
+            case 14: k2 ^= ((long) key.get(offset+13)) << 40;
+            case 13: k2 ^= ((long) key.get(offset+12)) << 32;
+            case 12: k2 ^= ((long) key.get(offset+11)) << 24;
+            case 11: k2 ^= ((long) key.get(offset+10)) << 16;
+            case 10: k2 ^= ((long) key.get(offset+9)) << 8;
+            case  9: k2 ^= ((long) key.get(offset+8)) << 0;
+                k2 *= c2; k2  = rotl64(k2,33); k2 *= c1; h2 ^= k2;
+
+            case  8: k1 ^= ((long) key.get(offset+7)) << 56;
+            case  7: k1 ^= ((long) key.get(offset+6)) << 48;
+            case  6: k1 ^= ((long) key.get(offset+5)) << 40;
+            case  5: k1 ^= ((long) key.get(offset+4)) << 32;
+            case  4: k1 ^= ((long) key.get(offset+3)) << 24;
+            case  3: k1 ^= ((long) key.get(offset+2)) << 16;
+            case  2: k1 ^= ((long) key.get(offset+1)) << 8;
+            case  1: k1 ^= ((long) key.get(offset));
+                k1 *= c1; k1  = rotl64(k1,31); k1 *= c2; h1 ^= k1;
+        };
+
+        //----------
+        // finalization
+
+        h1 ^= length; h2 ^= length;
+
+        h1 += h2;
+        h2 += h1;
+
+        h1 = fmix(h1);
+        h2 = fmix(h2);
+
+        h1 += h2;
+        h2 += h1;
+
+        return(new long[] {h1, h2});
+    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
----------------------------------------------------------------------
diff --git a/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java b/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
index e5a9e22..d4a4c34 100644
--- a/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
+++ b/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
@@ -21,45 +21,82 @@ package org.apache.cassandra.utils;
 import java.util.Random;
 
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class LongBloomFilterTest
 {
-    public BloomFilter bf;
+    private static final Logger logger = LoggerFactory.getLogger(LongBloomFilterTest.class);
 
     /**
      * NB: needs to run with -mx1G
      */
-    @Test
-    public void testBigInt()
+    public void testBigInt(FilterFactory.Type type)
     {
         int size = 10 * 1000 * 1000;
-        bf = BloomFilter.getFilter(size, FilterTestHelper.spec.bucketsPerElement);
-        FilterTestHelper.testFalsePositives(bf,
-                                            new KeyGenerator.IntGenerator(size),
-                                            new KeyGenerator.IntGenerator(size, size * 2));
+        Filter bf = FilterFactory.getFilter(size, FilterTestHelper.spec.bucketsPerElement, type);
+        double fp = FilterTestHelper.testFalsePositives(bf, new KeyGenerator.IntGenerator(size),
+                                                            new KeyGenerator.IntGenerator(size, size * 2));
+        logger.info("Bloom filter false positive: {}", fp);
     }
 
-    @Test
-    public void testBigRandom()
+    public void testBigRandom(FilterFactory.Type type)
     {
         int size = 10 * 1000 * 1000;
-        bf = BloomFilter.getFilter(size, FilterTestHelper.spec.bucketsPerElement);
-        FilterTestHelper.testFalsePositives(bf,
-                                            new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
-                                            new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+        Filter bf = FilterFactory.getFilter(size, FilterTestHelper.spec.bucketsPerElement, type);
+        double fp = FilterTestHelper.testFalsePositives(bf, new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
+                                                            new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+        logger.info("Bloom filter false positive: {}", fp);
     }
 
-    @Test
-    public void timeit()
+    public void timeit(FilterFactory.Type type)
     {
         int size = 300 * FilterTestHelper.ELEMENTS;
-        bf = BloomFilter.getFilter(size, FilterTestHelper.spec.bucketsPerElement);
+        Filter bf = FilterFactory.getFilter(size, FilterTestHelper.spec.bucketsPerElement, type);
+        double sumfp = 0;
         for (int i = 0; i < 10; i++)
         {
-            FilterTestHelper.testFalsePositives(bf,
-                                                new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
-                                                new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+            FilterTestHelper.testFalsePositives(bf, new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
+                                                    new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+
             bf.clear();
         }
+        logger.info("Bloom filter mean false positive: {}", sumfp/10);
+    }
+
+    @Test
+    public void testBigIntMurm2()
+    {
+        testBigInt(FilterFactory.Type.MURMUR2);
+    }
+
+    @Test
+    public void testBigRandomMurm2()
+    {
+        testBigRandom(FilterFactory.Type.MURMUR2);
+    }
+
+    @Test
+    public void timeitMurm2()
+    {
+        timeit(FilterFactory.Type.MURMUR2);
+    }
+
+    @Test
+    public void testBigIntMurm3()
+    {
+        testBigInt(FilterFactory.Type.MURMUR3);
+    }
+
+    @Test
+    public void testBigRandomMurm3()
+    {
+        testBigRandom(FilterFactory.Type.MURMUR3);
+    }
+
+    @Test
+    public void timeitMurm3()
+    {
+        timeit(FilterFactory.Type.MURMUR3);
     }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java b/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java
index 525ecde..6861007 100644
--- a/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java
+++ b/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java
@@ -20,8 +20,9 @@ package org.apache.cassandra.io.sstable;
  *
  */
 
-
+import org.apache.cassandra.utils.FilterFactory;
 import org.junit.Test;
+import static org.junit.Assert.*;
 
 public class DescriptorTest
 {
@@ -31,7 +32,7 @@ public class DescriptorTest
         Descriptor descriptor = Descriptor.fromFilename("Keyspace1-userActionUtilsKey-9-Data.db");
 
         assert descriptor.version.equals(Descriptor.LEGACY_VERSION);
-        assert descriptor.usesOldBloomFilter;
+        assert descriptor.filterType == FilterFactory.Type.SHA;
     }
 
     @Test
@@ -52,4 +53,16 @@ public class DescriptorTest
         assert "gz".equals(desc.version);
         assert !desc.tracksMaxTimestamp;
     }
+
+    @Test
+    public void testMurmurBloomFilter()
+    {
+        Descriptor desc = Descriptor.fromFilename("Keyspace1-Standard1-ia-1-Data.db");
+        assertEquals("ia", desc.version);
+        assertEquals(desc.filterType, FilterFactory.Type.MURMUR2);
+
+        desc = Descriptor.fromFilename("Keyspace1-Standard1-ib-1-Data.db");
+        assertEquals("ib", desc.version);
+        assertEquals(desc.filterType, FilterFactory.Type.MURMUR3);
+    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/utils/BloomFilterTest.java b/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
index 31a3369..d8f596f 100644
--- a/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
+++ b/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
@@ -33,21 +33,21 @@ import org.junit.Test;
 
 public class BloomFilterTest
 {
-    public BloomFilter bf;
+    public Filter bf;
 
     public BloomFilterTest()
     {
-        bf = BloomFilter.getFilter(10000L, FilterTestHelper.MAX_FAILURE_RATE);
+        bf = FilterFactory.getFilter(10000L, FilterTestHelper.MAX_FAILURE_RATE);
     }
 
-    public static BloomFilter testSerialize(BloomFilter f) throws IOException
+    public static Filter testSerialize(Filter f) throws IOException
     {
         f.add(ByteBufferUtil.bytes("a"));
         DataOutputBuffer out = new DataOutputBuffer();
-        f.serializer().serialize(f, out);
+        FilterFactory.serialize(f, out, FilterFactory.Type.MURMUR3);
 
         ByteArrayInputStream in = new ByteArrayInputStream(out.getData(), 0, out.getLength());
-        BloomFilter f2 = f.serializer().deserialize(new DataInputStream(in));
+        Filter f2 = FilterFactory.deserialize(new DataInputStream(in), FilterFactory.Type.MURMUR3);
 
         assert f2.isPresent(ByteBufferUtil.bytes("a"));
         assert !f2.isPresent(ByteBufferUtil.bytes("b"));
@@ -101,7 +101,7 @@ public class BloomFilterTest
         {
             return;
         }
-        BloomFilter bf2 = BloomFilter.getFilter(KeyGenerator.WordGenerator.WORDS / 2, FilterTestHelper.MAX_FAILURE_RATE);
+        Filter bf2 = FilterFactory.getFilter(KeyGenerator.WordGenerator.WORDS / 2, FilterTestHelper.MAX_FAILURE_RATE);
         int skipEven = KeyGenerator.WordGenerator.WORDS % 2 == 0 ? 0 : 2;
         FilterTestHelper.testFalsePositives(bf2,
                                             new KeyGenerator.WordGenerator(skipEven, 2),
@@ -123,7 +123,8 @@ public class BloomFilterTest
         {
             hashes.clear();
             ByteBuffer buf = keys.next();
-            for (long hashIndex : BloomFilter.getHashBuckets(buf, MAX_HASH_COUNT, 1024 * 1024))
+            BloomFilter bf = (BloomFilter) FilterFactory.getFilter(10, 10);
+            for (long hashIndex : bf.getHashBuckets(buf, MAX_HASH_COUNT, 1024 * 1024))
             {
                 hashes.add(hashIndex);
             }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/utils/FilterTestHelper.java b/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
index 85294f6..fd6f873 100644
--- a/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
+++ b/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
@@ -43,7 +43,7 @@ public class FilterTestHelper
         return new KeyGenerator.RandomStringGenerator(271828, ELEMENTS);
     }
 
-    public static void testFalsePositives(Filter f, ResetableIterator<ByteBuffer> keys, ResetableIterator<ByteBuffer> otherkeys)
+    public static double testFalsePositives(Filter f, ResetableIterator<ByteBuffer> keys, ResetableIterator<ByteBuffer> otherkeys)
     {
         assert keys.size() == otherkeys.size();
 
@@ -63,6 +63,7 @@ public class FilterTestHelper
 
         double fp_ratio = fp / (keys.size() * BloomCalculations.probs[spec.bucketsPerElement][spec.K]);
         assert fp_ratio < 1.03 : fp_ratio;
+        return fp_ratio;
     }
 
     public void testTrue()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java b/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
index fe52dda..248f325 100644
--- a/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
+++ b/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
@@ -43,10 +43,10 @@ public class LegacyBloomFilterTest
     {
         f.add(ByteBufferUtil.bytes("a"));
         DataOutputBuffer out = new DataOutputBuffer();
-        f.serializer().serialize(f, out);
+        FilterFactory.serialize(f, out, FilterFactory.Type.SHA);
 
         ByteArrayInputStream in = new ByteArrayInputStream(out.getData(), 0, out.getLength());
-        LegacyBloomFilter f2 = f.serializer().deserialize(new DataInputStream(in));
+        LegacyBloomFilter f2 = (LegacyBloomFilter) FilterFactory.deserialize(new DataInputStream(in), FilterFactory.Type.SHA);
 
         assert f2.isPresent(ByteBufferUtil.bytes("a"));
         assert !f2.isPresent(ByteBufferUtil.bytes("b"));

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/unit/org/apache/cassandra/utils/SerializationsTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/utils/SerializationsTest.java b/test/unit/org/apache/cassandra/utils/SerializationsTest.java
index d2c85c0..c30daef 100644
--- a/test/unit/org/apache/cassandra/utils/SerializationsTest.java
+++ b/test/unit/org/apache/cassandra/utils/SerializationsTest.java
@@ -23,6 +23,7 @@ package org.apache.cassandra.utils;
 
 import org.apache.cassandra.AbstractSerializationsTester;
 import org.apache.cassandra.service.StorageService;
+import org.apache.cassandra.utils.FilterFactory.Type;
 import org.junit.Test;
 
 import java.io.DataInputStream;
@@ -33,24 +34,35 @@ import java.nio.ByteBuffer;
 public class SerializationsTest extends AbstractSerializationsTester
 {
 
-    private void testBloomFilterWrite() throws IOException
+    private void testBloomFilterWrite(Type murmur) throws IOException
     {
-        BloomFilter bf = BloomFilter.getFilter(1000000, 0.0001);
+        Filter bf = FilterFactory.getFilter(1000000, 0.0001);
         for (int i = 0; i < 100; i++)
             bf.add(StorageService.getPartitioner().getTokenFactory().toByteArray(StorageService.getPartitioner().getRandomToken()));
         DataOutputStream out = getOutput("utils.BloomFilter.bin");
-        BloomFilter.serializer().serialize(bf, out);
+        FilterFactory.serialize(bf, out, murmur);
         out.close();
     }
 
     @Test
-    public void testBloomFilterRead() throws IOException
+    public void testBloomFilterReadMURMUR2() throws IOException
     {
         if (EXECUTE_WRITES)
-            testBloomFilterWrite();
+            testBloomFilterWrite(FilterFactory.Type.MURMUR2);
 
         DataInputStream in = getInput("utils.BloomFilter.bin");
-        assert BloomFilter.serializer().deserialize(in) != null;
+        assert FilterFactory.deserialize(in, FilterFactory.Type.MURMUR2) != null;
+        in.close();
+    }
+
+    @Test
+    public void testBloomFilterReadMURMUR3() throws IOException
+    {
+        if (EXECUTE_WRITES)
+            testBloomFilterWrite(FilterFactory.Type.MURMUR3);
+
+        DataInputStream in = getInput("utils.BloomFilter.bin");
+        assert FilterFactory.deserialize(in, FilterFactory.Type.MURMUR3) != null;
         in.close();
     }
 
@@ -65,8 +77,8 @@ public class SerializationsTest extends AbstractSerializationsTester
             b.add(key);
         }
         DataOutputStream out = getOutput("utils.LegacyBloomFilter.bin");
-        LegacyBloomFilter.serializer().serialize(a, out);
-        LegacyBloomFilter.serializer().serialize(b, out);
+        FilterFactory.serialize(a, out, FilterFactory.Type.SHA);
+        FilterFactory.serialize(b, out, FilterFactory.Type.SHA);
         out.close();
     }
 
@@ -77,7 +89,7 @@ public class SerializationsTest extends AbstractSerializationsTester
             testLegacyBloomFilterWrite();
 
         DataInputStream in = getInput("utils.LegacyBloomFilter.bin");
-        assert LegacyBloomFilter.serializer().deserialize(in) != null;
+        assert FilterFactory.deserialize(in, FilterFactory.Type.SHA) != null;
         in.close();
     }
 


Mime
View raw message