geode-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sbawas...@apache.org
Subject [5/5] incubator-geode git commit: GEODE-970: Remove duplicate HLL classes
Date Thu, 18 Feb 2016 14:51:59 GMT
GEODE-970: Remove duplicate HLL classes

HLL classes were duplicated in com.gemstone.gemfire.internal.redis.hll and com.gemstone.gemfire.cache.hdfs.internal.cardinality packages.
They now live in com.gemstone.gemfire.internal.hll package.


Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/e685fd85
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/e685fd85
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/e685fd85

Branch: refs/heads/develop
Commit: e685fd85ac7e2607f70b47bfb448b1d91a56b103
Parents: 92be457
Author: Swapnil Bawaskar <sbawaskar@pivotal.io>
Authored: Wed Feb 17 13:28:13 2016 -0800
Committer: Swapnil Bawaskar <sbawaskar@pivotal.io>
Committed: Thu Feb 18 06:51:36 2016 -0800

----------------------------------------------------------------------
 .../cache/hdfs/internal/cardinality/Bits.java   |   54 -
 .../cardinality/CardinalityMergeException.java  |   42 -
 .../hdfs/internal/cardinality/HyperLogLog.java  |  313 -----
 .../hdfs/internal/cardinality/IBuilder.java     |   41 -
 .../hdfs/internal/cardinality/ICardinality.java |   87 --
 .../hdfs/internal/cardinality/MurmurHash.java   |  261 -----
 .../hdfs/internal/cardinality/RegisterSet.java  |  136 ---
 .../hdfs/internal/hoplog/AbstractHoplog.java    |    2 +-
 .../hdfs/internal/hoplog/HFileSortedOplog.java  |    4 +-
 .../hoplog/HdfsSortedOplogOrganizer.java        |    8 +-
 .../cache/hdfs/internal/hoplog/Hoplog.java      |    3 +-
 .../internal/hoplog/SequenceFileHoplog.java     |    2 +-
 .../com/gemstone/gemfire/internal/hll/Bits.java |   65 ++
 .../internal/hll/CardinalityMergeException.java |   42 +
 .../gemfire/internal/hll/HyperLogLog.java       |  362 ++++++
 .../gemfire/internal/hll/HyperLogLogPlus.java   | 1070 ++++++++++++++++++
 .../gemstone/gemfire/internal/hll/IBuilder.java |   41 +
 .../gemfire/internal/hll/ICardinality.java      |   89 ++
 .../gemfire/internal/hll/MurmurHash.java        |  261 +++++
 .../gemfire/internal/hll/RegisterSet.java       |  126 +++
 .../gemfire/internal/redis/RegionProvider.java  |    2 +-
 .../internal/redis/executor/hll/Bits.java       |   65 --
 .../executor/hll/CardinalityMergeException.java |   42 -
 .../redis/executor/hll/HyperLogLog.java         |  360 ------
 .../redis/executor/hll/HyperLogLogPlus.java     | 1068 -----------------
 .../internal/redis/executor/hll/IBuilder.java   |   41 -
 .../redis/executor/hll/ICardinality.java        |   89 --
 .../internal/redis/executor/hll/MurmurHash.java |  234 ----
 .../redis/executor/hll/PFAddExecutor.java       |    1 +
 .../redis/executor/hll/PFCountExecutor.java     |    2 +
 .../redis/executor/hll/PFMergeExecutor.java     |    2 +
 .../redis/executor/hll/RegisterSet.java         |  126 ---
 .../gemfire/redis/GemFireRedisServer.java       |    2 +-
 33 files changed, 2073 insertions(+), 2970 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/Bits.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/Bits.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/Bits.java
deleted file mode 100644
index 4fa3443..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/Bits.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2011 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInputStream;
-import java.io.IOException;
-
-public class Bits
-{
-
-    public static int[] getBits(byte[] mBytes) throws IOException
-    {
-        int bitSize = mBytes.length / 4;
-        int[] bits = new int[bitSize];
-        DataInputStream dis = new DataInputStream(new ByteArrayInputStream(mBytes));
-        for (int i = 0; i < bitSize; i++)
-        {
-            bits[i] = dis.readInt();
-        }
-        return bits;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/CardinalityMergeException.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/CardinalityMergeException.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/CardinalityMergeException.java
deleted file mode 100644
index a63f8e6..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/CardinalityMergeException.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2011 Clearspring Technologies, Inc. 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-@SuppressWarnings("serial")
-public abstract class CardinalityMergeException extends Exception
-{
-    public CardinalityMergeException(String message)
-    {
-        super(message);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/HyperLogLog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/HyperLogLog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/HyperLogLog.java
deleted file mode 100644
index 6d945bc..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/HyperLogLog.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2012 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.Serializable;
-
-/**
- * Java implementation of HyperLogLog (HLL) algorithm from this paper:
- * <p/>
- * http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
- * <p/>
- * HLL is an improved version of LogLog that is capable of estimating
- * the cardinality of a set with accuracy = 1.04/sqrt(m) where
- * m = 2^b.  So we can control accuracy vs space usage by increasing
- * or decreasing b.
- * <p/>
- * The main benefit of using HLL over LL is that it only requires 64%
- * of the space that LL does to get the same accuracy.
- * <p/>
- * This implementation implements a single counter.  If a large (millions)
- * number of counters are required you may want to refer to:
- * <p/>
- * http://dsiutils.dsi.unimi.it/
- * <p/>
- * It has a more complex implementation of HLL that supports multiple counters
- * in a single object, drastically reducing the java overhead from creating
- * a large number of objects.
- * <p/>
- * This implementation leveraged a javascript implementation that Yammer has
- * been working on:
- * <p/>
- * https://github.com/yammer/probablyjs
- * <p>
- * Note that this implementation does not include the long range correction function
- * defined in the original paper.  Empirical evidence shows that the correction
- * function causes more harm than good.
- * </p>
- *
- * <p>
- * Users have different motivations to use different types of hashing functions.
- * Rather than try to keep up with all available hash functions and to remove
- * the concern of causing future binary incompatibilities this class allows clients
- * to offer the value in hashed int or long form.  This way clients are free
- * to change their hash function on their own time line.  We recommend using Google's
- * Guava Murmur3_128 implementation as it provides good performance and speed when
- * high precision is required.  In our tests the 32bit MurmurHash function included
- * in this project is faster and produces better results than the 32 bit murmur3
- * implementation google provides.
- * </p>
- */
-public class HyperLogLog implements ICardinality
-{
-    private final RegisterSet registerSet;
-    private final int log2m;
-    private final double alphaMM;
-
-
-    /**
-     * Create a new HyperLogLog instance using the specified standard deviation.
-     *
-     * @param rsd - the relative standard deviation for the counter.
-     *            smaller values create counters that require more space.
-     */
-    public HyperLogLog(double rsd)
-    {
-        this(log2m(rsd));
-    }
-
-    private static int log2m(double rsd)
-    {
-        return (int) (Math.log((1.106 / rsd) * (1.106 / rsd)) / Math.log(2));
-    }
-
-    /**
-     * Create a new HyperLogLog instance.  The log2m parameter defines the accuracy of
-     * the counter.  The larger the log2m the better the accuracy.
-     * <p/>
-     * accuracy = 1.04/sqrt(2^log2m)
-     *
-     * @param log2m - the number of bits to use as the basis for the HLL instance
-     */
-    public HyperLogLog(int log2m)
-    {
-        this(log2m, new RegisterSet((int) Math.pow(2, log2m)));
-    }
-
-    /**
-     * Creates a new HyperLogLog instance using the given registers.  Used for unmarshalling a serialized
-     * instance and for merging multiple counters together.
-     *
-     * @param registerSet - the initial values for the register set
-     */
-    public HyperLogLog(int log2m, RegisterSet registerSet)
-    {
-        this.registerSet = registerSet;
-        this.log2m = log2m;
-        int m = (int) Math.pow(2, this.log2m);
-
-        // See the paper.
-        switch (log2m)
-        {
-            case 4:
-                alphaMM = 0.673 * m * m;
-                break;
-            case 5:
-                alphaMM = 0.697 * m * m;
-                break;
-            case 6:
-                alphaMM = 0.709 * m * m;
-                break;
-            default:
-                alphaMM = (0.7213 / (1 + 1.079 / m)) * m * m;
-        }
-    }
-
-
-    @Override
-    public boolean offerHashed(long hashedValue)
-    {
-        // j becomes the binary address determined by the first b log2m of x
-        // j will be between 0 and 2^log2m
-        final int j = (int) (hashedValue >>> (Long.SIZE - log2m));
-        final int r = Long.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
-        return registerSet.updateIfGreater(j, r);
-    }
-
-    @Override
-    public boolean offerHashed(int hashedValue)
-    {
-        // j becomes the binary address determined by the first b log2m of x
-        // j will be between 0 and 2^log2m
-        final int j = hashedValue >>> (Integer.SIZE - log2m);
-        final int r = Integer.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
-        return registerSet.updateIfGreater(j, r);
-    }
-
-	@Override
-    public boolean offer(Object o)
-    {
-        final int x = MurmurHash.hash(o);
-        return offerHashed(x);
-    }
-
-
-    @Override
-    public long cardinality()
-    {
-        double registerSum = 0;
-        int count = registerSet.count;
-        double zeros = 0.0;
-        for (int j = 0; j < registerSet.count; j++)
-        {
-            int val = registerSet.get(j);
-            registerSum += 1.0 / (1<<val);
-            if (val == 0) {
-                zeros++;
-            }
-        }
-
-        double estimate = alphaMM * (1 / registerSum);
-
-        if (estimate <= (5.0 / 2.0) * count)
-        {
-            // Small Range Estimate
-            return Math.round(count * Math.log(count / zeros));
-        }
-        else
-        {
-            return Math.round(estimate);
-        }
-    }
-
-    @Override
-    public int sizeof()
-    {
-        return registerSet.size * 4;
-    }
-
-    @Override
-    public byte[] getBytes() throws IOException
-    {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        DataOutputStream dos = new DataOutputStream(baos);
-
-        dos.writeInt(log2m);
-        dos.writeInt(registerSet.size * 4);
-        for (int x : registerSet.bits())
-        {
-            dos.writeInt(x);
-        }
-
-        return baos.toByteArray();
-    }
-    
-    /** Add all the elements of the other set to this set.
-     * 
-     * This operation does not imply a loss of precision.
-     * 
-     * @param other A compatible Hyperloglog instance (same log2m)
-     * @throws CardinalityMergeException if other is not compatible
-     */
-    public void addAll(HyperLogLog other) throws CardinalityMergeException {
-        if (this.sizeof() != other.sizeof())
-        {
-            throw new HyperLogLogMergeException("Cannot merge estimators of different sizes");
-        }
-        
-        registerSet.merge(other.registerSet);
-    }
-
-    @Override
-    public ICardinality merge(ICardinality... estimators) throws CardinalityMergeException
-    {
-        HyperLogLog merged = new HyperLogLog(log2m);
-        merged.addAll(this);
-
-        if (estimators == null)
-        {
-            return merged;
-        }
-        
-        for (ICardinality estimator : estimators)
-        {
-            if (!(estimator instanceof HyperLogLog))
-            {
-                throw new HyperLogLogMergeException("Cannot merge estimators of different class");
-            }
-            HyperLogLog hll = (HyperLogLog) estimator;
-            merged.addAll(hll);
-        }
-        
-        return merged;
-    }
-
-    public static class Builder implements IBuilder<ICardinality>, Serializable
-    {
-        private double rsd;
-
-        public Builder(double rsd)
-        {
-            this.rsd = rsd;
-        }
-
-        @Override
-        public HyperLogLog build()
-        {
-            return new HyperLogLog(rsd);
-        }
-
-        @Override
-        public int sizeof()
-        {
-            int log2m = log2m(rsd);
-            int k = (int) Math.pow(2, log2m);
-            return RegisterSet.getBits(k) * 4;
-        }
-
-        public static HyperLogLog build(byte[] bytes) throws IOException
-        {
-            ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
-            DataInputStream oi = new DataInputStream(bais);
-            int log2m = oi.readInt();
-            int size = oi.readInt();
-            byte[] longArrayBytes = new byte[size];
-            oi.readFully(longArrayBytes);
-            return new HyperLogLog(log2m, new RegisterSet((int) Math.pow(2, log2m), Bits.getBits(longArrayBytes)));
-        }
-    }
-
-    @SuppressWarnings("serial")
-    protected static class HyperLogLogMergeException extends CardinalityMergeException
-    {
-        public HyperLogLogMergeException(String message)
-        {
-            super(message);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/IBuilder.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/IBuilder.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/IBuilder.java
deleted file mode 100644
index 4321dc6..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/IBuilder.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2011 Clearspring Technologies, Inc. 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-
-public interface IBuilder<T>
-{
-    T build();
-
-    int sizeof();
-}

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/ICardinality.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/ICardinality.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/ICardinality.java
deleted file mode 100644
index ae6d7d6..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/ICardinality.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2011 Clearspring Technologies, Inc. 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-import java.io.IOException;
-
-
-public interface ICardinality
-{
-    /**
-     * @param o stream element
-     * @return false if the value returned by cardinality() is unaffected by the appearance of o in the stream.
-     */
-    boolean offer(Object o);
-
-    /**
-     * Offer the value as a hashed long value
-     *
-     * @param hashedLong - the hash of the item to offer to the estimator
-     * @return false if the value returned by cardinality() is unaffected by the appearance of hashedLong in the stream
-     */
-    boolean offerHashed(long hashedLong);
-
-    /**
-     * Offer the value as a hashed long value
-     *
-     * @param hashedInt - the hash of the item to offer to the estimator
-     * @return false if the value returned by cardinality() is unaffected by the appearance of hashedInt in the stream
-     */
-    boolean offerHashed(int hashedInt);
-
-    /**
-     * @return the number of unique elements in the stream or an estimate thereof
-     */
-    long cardinality();
-
-    /**
-     * @return size in bytes needed for serialization
-     */
-    int sizeof();
-
-    /**
-     * @throws IOException
-     */
-    byte[] getBytes() throws IOException;
-
-    /**
-     * Merges estimators to produce a new estimator for the combined streams
-     * of this estimator and those passed as arguments.
-     * 
-     * Nor this estimator nor the one passed as parameters are modified.
-     *
-     * @param estimators Zero or more compatible estimators
-     * @throws CardinalityMergeException If at least one of the estimators is not compatible with this one
-     */
-    ICardinality merge(ICardinality... estimators) throws CardinalityMergeException;
-}

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/MurmurHash.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/MurmurHash.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/MurmurHash.java
deleted file mode 100644
index bc760a8..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/MurmurHash.java
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * This is a very fast, non-cryptographic hash suitable for general hash-based
- * lookup. See http://murmurhash.googlepages.com/ for more details.
- * <p/>
- * <p>
- * The C version of MurmurHash 2.0 found at that site was ported to Java by
- * Andrzej Bialecki (ab at getopt org).
- * </p>
- */
-public class MurmurHash
-{
-    public static int hash(Object o)
-    {
-        if (o == null)
-        {
-            return 0;
-        }
-        if (o instanceof Long)
-        {
-            return hashLong((Long) o);
-        }
-        if (o instanceof Integer)
-        {
-            return hashLong((Integer) o);
-        }
-        if (o instanceof Double)
-        {
-            return hashLong(Double.doubleToRawLongBits((Double) o));
-        }
-        if (o instanceof Float)
-        {
-            return hashLong(Float.floatToRawIntBits((Float) o));
-        }
-        if (o instanceof String)
-        {
-            return hash(((String) o).getBytes());
-        }
-        if (o instanceof byte[])
-        {
-            return hash((byte[]) o);
-        }
-        return hash(o.toString());
-    }
-
-    public static int hash(byte[] data)
-    {
-        return hash(data, 0, data.length, -1);
-    }
-
-    public static int hash(byte[] data, int seed)
-    {
-        return hash(data, 0, data.length, seed);
-    }
-
-    public static int hash(byte[] data, int offset, int length, int seed)
-    {
-        int m = 0x5bd1e995;
-        int r = 24;
-
-        int h = seed ^ length;
-
-        int len_4 = length >> 2;
-
-        for (int i = 0; i < len_4; i++)
-        {
-            int i_4 = i << 2;
-            int k = data[offset + i_4 + 3];
-            k = k << 8;
-            k = k | (data[offset + i_4 + 2] & 0xff);
-            k = k << 8;
-            k = k | (data[offset + i_4 + 1] & 0xff);
-            k = k << 8;
-            k = k | (data[offset + i_4 + 0] & 0xff);
-            k *= m;
-            k ^= k >>> r;
-            k *= m;
-            h *= m;
-            h ^= k;
-        }
-
-        // avoid calculating modulo
-        int len_m = len_4 << 2;
-        int left = length - len_m;
-
-        if (left != 0)
-        {
-            if (left >= 3)
-            {
-                h ^= (int) data[offset + length - 3] << 16;
-            }
-            if (left >= 2)
-            {
-                h ^= (int) data[offset + length - 2] << 8;
-            }
-            if (left >= 1)
-            {
-                h ^= (int) data[offset + length - 1];
-            }
-
-            h *= m;
-        }
-
-        h ^= h >>> 13;
-        h *= m;
-        h ^= h >>> 15;
-
-        return h;
-    }
-
-    public static int hashLong(long data)
-    {
-        int m = 0x5bd1e995;
-        int r = 24;
-
-        int h = 0;
-
-        int k = (int) data * m;
-        k ^= k >>> r;
-        h ^= k * m;
-
-        k = (int) (data >> 32) * m;
-        k ^= k >>> r;
-        h *= m;
-        h ^= k * m;
-
-        h ^= h >>> 13;
-        h *= m;
-        h ^= h >>> 15;
-
-        return h;
-    }
-
-    public static long hash64(Object o)
-    {
-        if (o == null)
-        {
-            return 0l;
-        }
-        else if (o instanceof String)
-        {
-            final byte[] bytes = ((String) o).getBytes();
-            return hash64(bytes, bytes.length);
-        }
-        else if (o instanceof byte[])
-        {
-            final byte[] bytes = (byte[]) o;
-            return hash64(bytes, bytes.length);
-        }
-        return hash64(o.toString());
-    }
-
-    // 64 bit implementation copied from here:  https://github.com/tnm/murmurhash-java
-
-    /**
-     * Generates 64 bit hash from byte array with default seed value.
-     *
-     * @param data   byte array to hash
-     * @param length length of the array to hash
-     * @return 64 bit hash of the given string
-     */
-    public static long hash64(final byte[] data, int length)
-    {
-        return hash64(data, length, 0xe17a1465);
-    }
-
-
-    /**
-     * Generates 64 bit hash from byte array of the given length and seed.
-     *
-     * @param data   byte array to hash
-     * @param length length of the array to hash
-     * @param seed   initial seed value
-     * @return 64 bit hash of the given array
-     */
-    public static long hash64(final byte[] data, int length, int seed)
-    {
-        final long m = 0xc6a4a7935bd1e995L;
-        final int r = 47;
-
-        long h = (seed & 0xffffffffl) ^ (length * m);
-
-        int length8 = length / 8;
-
-        for (int i = 0; i < length8; i++)
-        {
-            final int i8 = i * 8;
-            long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8)
-                    + (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24)
-                    + (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40)
-                    + (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56);
-
-            k *= m;
-            k ^= k >>> r;
-            k *= m;
-
-            h ^= k;
-            h *= m;
-        }
-
-        switch (length % 8)
-        {
-            case 7:
-                h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
-            case 6:
-                h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
-            case 5:
-                h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
-            case 4:
-                h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
-            case 3:
-                h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
-            case 2:
-                h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
-            case 1:
-                h ^= (long) (data[length & ~7] & 0xff);
-                h *= m;
-        }
-        ;
-
-        h ^= h >>> r;
-        h *= m;
-        h ^= h >>> r;
-
-        return h;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/RegisterSet.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/RegisterSet.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/RegisterSet.java
deleted file mode 100644
index 6f1b919..0000000
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/cardinality/RegisterSet.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Copyright (C) 2012 Clearspring Technologies, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.gemstone.gemfire.cache.hdfs.internal.cardinality;
-
-public class RegisterSet
-{
-    public final static int LOG2_BITS_PER_WORD = 6;
-    public final static int REGISTER_SIZE = 5;
-
-    public final int count;
-    public final int size;
-
-    private final int[] M;
-
-    public RegisterSet(int count)
-    {
-        this(count, null);
-    }
-
-    public RegisterSet(int count, int[] initialValues)
-    {
-        this.count = count;
-        int bits = getBits(count);
-
-        if (initialValues == null)
-        {
-            if (bits == 0)
-            {
-                this.M = new int[1];
-            }
-            else if (bits % Integer.SIZE == 0)
-            {
-                this.M = new int[bits];
-            }
-            else
-            {
-                this.M = new int[bits + 1];
-            }
-        }
-        else
-        {
-            this.M = initialValues;
-        }
-        this.size = this.M.length;
-    }
-
-    public static int getBits(int count)
-    {
-        return count / LOG2_BITS_PER_WORD;
-    }
-
-    public void set(int position, int value)
-    {
-        int bucketPos = position / LOG2_BITS_PER_WORD;
-        int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD));
-        this.M[bucketPos] = (this.M[bucketPos] & ~(0x1f << shift)) | (value << shift);
-    }
-
-    public int get(int position)
-    {
-        int bucketPos = position / LOG2_BITS_PER_WORD;
-        int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD));
-        return (this.M[bucketPos] & (0x1f << shift)) >>> shift;
-    }
-    
-    public boolean updateIfGreater(int position, int value)
-    {
-        int bucket = position / LOG2_BITS_PER_WORD;
-        int shift  = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD));
-        int mask = 0x1f << shift;
-
-        // Use long to avoid sign issues with the left-most shift
-        long curVal = this.M[bucket] & mask;
-        long newVal = value << shift;
-        if (curVal < newVal) {
-            this.M[bucket] = (int)((this.M[bucket] & ~mask) | newVal);
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    public void merge(RegisterSet that)
-    {
-        for (int bucket = 0; bucket < M.length; bucket++)
-        {
-            int word = 0;
-            for (int j = 0; j < LOG2_BITS_PER_WORD; j++)
-            {
-                int mask = 0x1f << (REGISTER_SIZE * j);
-
-                int thisVal = (this.M[bucket] & mask);
-                int thatVal = (that.M[bucket] & mask);
-                word |= (thisVal < thatVal) ? thatVal : thisVal;
-            }
-            this.M[bucket] = word;
-        }
-    }
-
-    public int[] bits()
-    {
-        int[] copy = new int[size];
-        System.arraycopy(M, 0, copy, 0, M.length);
-        return copy;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/AbstractHoplog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/AbstractHoplog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/AbstractHoplog.java
index 636dd91..d2fdbe7 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/AbstractHoplog.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/AbstractHoplog.java
@@ -19,6 +19,7 @@ package com.gemstone.gemfire.cache.hdfs.internal.hoplog;
 import java.io.IOException;
 import java.util.regex.Matcher;
 
+import com.gemstone.gemfire.internal.hll.ICardinality;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -32,7 +33,6 @@ import org.apache.hadoop.io.compress.SnappyCodec;
 
 import com.gemstone.gemfire.cache.hdfs.HDFSIOException;
 import com.gemstone.gemfire.cache.hdfs.internal.HDFSStoreImpl;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.ICardinality;
 import com.gemstone.gemfire.cache.hdfs.internal.org.apache.hadoop.io.SequenceFile;
 import com.gemstone.gemfire.cache.hdfs.internal.org.apache.hadoop.io.SequenceFile.CompressionType;
 import com.gemstone.gemfire.cache.hdfs.internal.org.apache.hadoop.io.SequenceFile.Writer.Option;

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HFileSortedOplog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HFileSortedOplog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HFileSortedOplog.java
index 8fc9c8e..5ba20d2 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HFileSortedOplog.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HFileSortedOplog.java
@@ -29,6 +29,8 @@ import java.util.Map.Entry;
 import java.util.NoSuchElementException;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import com.gemstone.gemfire.internal.hll.HyperLogLog;
+import com.gemstone.gemfire.internal.hll.ICardinality;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.ipc.RemoteException;
@@ -37,8 +39,6 @@ import org.apache.hadoop.util.ShutdownHookManager;
 import com.gemstone.gemfire.cache.CacheClosedException;
 import com.gemstone.gemfire.cache.hdfs.HDFSIOException;
 import com.gemstone.gemfire.cache.hdfs.internal.HDFSStoreImpl;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.HyperLogLog;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.ICardinality;
 import com.gemstone.gemfire.internal.cache.persistence.soplog.DelegatingSerializedComparator;
 import com.gemstone.gemfire.internal.cache.persistence.soplog.HFileStoreStatistics;
 import com.gemstone.gemfire.internal.cache.persistence.soplog.SortedOplogStatistics;

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HdfsSortedOplogOrganizer.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HdfsSortedOplogOrganizer.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HdfsSortedOplogOrganizer.java
index 9890b3b..61b925b 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HdfsSortedOplogOrganizer.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/HdfsSortedOplogOrganizer.java
@@ -40,6 +40,10 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.gemstone.gemfire.internal.hll.CardinalityMergeException;
+import com.gemstone.gemfire.internal.hll.HyperLogLog;
+import com.gemstone.gemfire.internal.hll.ICardinality;
+import com.gemstone.gemfire.internal.hll.MurmurHash;
 import org.apache.commons.lang.NotImplementedException;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -54,10 +58,6 @@ import com.gemstone.gemfire.cache.hdfs.HDFSIOException;
 import com.gemstone.gemfire.cache.hdfs.HDFSStore;
 import com.gemstone.gemfire.cache.hdfs.internal.QueuedPersistentEvent;
 import com.gemstone.gemfire.cache.hdfs.internal.SortedHoplogPersistedEvent;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.CardinalityMergeException;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.HyperLogLog;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.ICardinality;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.MurmurHash;
 import com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSCompactionManager.CompactionRequest;
 import com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSRegionDirector.HdfsRegionManager;
 import com.gemstone.gemfire.cache.hdfs.internal.hoplog.Hoplog.HoplogReader;

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/Hoplog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/Hoplog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/Hoplog.java
index 113e49b..a5030ae 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/Hoplog.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/Hoplog.java
@@ -16,12 +16,13 @@
  */
 package com.gemstone.gemfire.cache.hdfs.internal.hoplog;
 
+import com.gemstone.gemfire.internal.hll.ICardinality;
+
 import java.io.Closeable;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.EnumMap;
 
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.ICardinality;
 
 /**
  * Ordered sequence file

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/SequenceFileHoplog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/SequenceFileHoplog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/SequenceFileHoplog.java
index 04cbb05..fbb8f14 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/SequenceFileHoplog.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/cache/hdfs/internal/hoplog/SequenceFileHoplog.java
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.EnumMap;
 
+import com.gemstone.gemfire.internal.hll.ICardinality;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -32,7 +33,6 @@ import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.Text;
 
 import com.gemstone.gemfire.cache.hdfs.HDFSIOException;
-import com.gemstone.gemfire.cache.hdfs.internal.cardinality.ICardinality;
 import com.gemstone.gemfire.cache.hdfs.internal.hoplog.HoplogSetReader.HoplogIterator;
 import com.gemstone.gemfire.cache.hdfs.internal.org.apache.hadoop.io.SequenceFile;
 import com.gemstone.gemfire.cache.hdfs.internal.org.apache.hadoop.io.SequenceFile.Reader;

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/Bits.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/Bits.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/Bits.java
new file mode 100755
index 0000000..bd0b33c
--- /dev/null
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/Bits.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gemstone.gemfire.internal.hll;
+
+/*
+ * Copyright (C) 2011 Clearspring Technologies, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.IOException;
+
+public class Bits {
+
+    public static int[] getBits(byte[] mBytes) throws IOException {
+        int bitSize = mBytes.length / 4;
+        int[] bits = new int[bitSize];
+        DataInputStream dis = new DataInputStream(new ByteArrayInputStream(mBytes));
+        for (int i = 0; i < bitSize; i++) {
+            bits[i] = dis.readInt();
+        }
+        return bits;
+    }
+
+    /**
+     * This method might be better described as
+     * "byte array to int array" or "data input to int array"
+     */
+    public static int[] getBits(DataInput dataIn, int byteLength) throws IOException {
+        int bitSize = byteLength / 4;
+        int[] bits = new int[bitSize];
+        for (int i = 0; i < bitSize; i++) {
+            bits[i] = dataIn.readInt();
+        }
+        return bits;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/CardinalityMergeException.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/CardinalityMergeException.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/CardinalityMergeException.java
new file mode 100755
index 0000000..5fd6b0b
--- /dev/null
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/CardinalityMergeException.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gemstone.gemfire.internal.hll;
+
+/*
+ * Copyright (C) 2011 Clearspring Technologies, Inc. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+@SuppressWarnings("serial")
+public abstract class CardinalityMergeException extends Exception {
+
+    public CardinalityMergeException(String message) {
+        super(message);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/e685fd85/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/HyperLogLog.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/HyperLogLog.java b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/HyperLogLog.java
new file mode 100755
index 0000000..fa5508d
--- /dev/null
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/internal/hll/HyperLogLog.java
@@ -0,0 +1,362 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.gemstone.gemfire.internal.hll;
+
+/*
+ * Copyright (C) 2012 Clearspring Technologies, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.gemstone.gemfire.internal.redis.executor.hll.HllExecutor;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.io.Serializable;
+
+/**
+ * Java implementation of HyperLogLog (HLL) algorithm from this paper:
+ * <p/>
+ * http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
+ * <p/>
+ * HLL is an improved version of LogLog that is capable of estimating
+ * the cardinality of a set with accuracy = 1.04/sqrt(m) where
+ * m = 2^b.  So we can control accuracy vs space usage by increasing
+ * or decreasing b.
+ * <p/>
+ * The main benefit of using HLL over LL is that it only requires 64%
+ * of the space that LL does to get the same accuracy.
+ * <p/>
+ * This implementation implements a single counter.  If a large (millions)
+ * number of counters are required you may want to refer to:
+ * <p/>
+ * http://dsiutils.dsi.unimi.it/
+ * <p/>
+ * It has a more complex implementation of HLL that supports multiple counters
+ * in a single object, drastically reducing the java overhead from creating
+ * a large number of objects.
+ * <p/>
+ * This implementation leveraged a javascript implementation that Yammer has
+ * been working on:
+ * <p/>
+ * https://github.com/yammer/probablyjs
+ * <p>
+ * Note that this implementation does not include the long range correction function
+ * defined in the original paper.  Empirical evidence shows that the correction
+ * function causes more harm than good.
+ * </p>
+ * <p/>
+ * <p>
+ * Users have different motivations to use different types of hashing functions.
+ * Rather than try to keep up with all available hash functions and to remove
+ * the concern of causing future binary incompatibilities this class allows clients
+ * to offer the value in hashed int or long form.  This way clients are free
+ * to change their hash function on their own time line.  We recommend using Google's
+ * Guava Murmur3_128 implementation as it provides good performance and speed when
+ * high precision is required.  In our tests the 32bit MurmurHash function included
+ * in this project is faster and produces better results than the 32 bit murmur3
+ * implementation google provides.
+ * </p>
+ */
+public class HyperLogLog implements ICardinality, Serializable {
+
+  private static final long serialVersionUID = -4661220245111112301L;
+  private final RegisterSet registerSet;
+  private final int log2m;
+  private final double alphaMM;
+
+
+  /**
+   * Create a new HyperLogLog instance using the specified standard deviation.
+   *
+   * @param rsd - the relative standard deviation for the counter.
+   *            smaller values create counters that require more space.
+   */
+  public HyperLogLog(double rsd) {
+    this(log2m(rsd));
+  }
+
+  private static int log2m(double rsd) {
+    return (int) (Math.log((1.106 / rsd) * (1.106 / rsd)) / Math.log(2));
+  }
+
+  /**
+   * Create a new HyperLogLog instance.  The log2m parameter defines the accuracy of
+   * the counter.  The larger the log2m the better the accuracy.
+   * <p/>
+   * accuracy = 1.04/sqrt(2^log2m)
+   *
+   * @param log2m - the number of bits to use as the basis for the HLL instance
+   */
+  public HyperLogLog(int log2m) {
+    this(log2m, new RegisterSet(1 << log2m));
+  }
+
+  /**
+   * Creates a new HyperLogLog instance using the given registers.  Used for unmarshalling a serialized
+   * instance and for merging multiple counters together.
+   *
+   * @param registerSet - the initial values for the register set
+   */
+  @Deprecated
+  public HyperLogLog(int log2m, RegisterSet registerSet) {
+    if (log2m < 0 || log2m > 30) {
+      throw new IllegalArgumentException("log2m argument is "
+          + log2m + " and is outside the range [0, 30]");
+    }
+    this.registerSet = registerSet;
+    this.log2m = log2m;
+    int m = 1 << this.log2m;
+
+    alphaMM = getAlphaMM(log2m, m);
+  }
+
+  @Override
+  public boolean offerHashed(long hashedValue) {
+    // j becomes the binary address determined by the first b log2m of x
+    // j will be between 0 and 2^log2m
+    final int j = (int) (hashedValue >>> (Long.SIZE - log2m));
+    final int r = Long.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
+    return registerSet.updateIfGreater(j, r);
+  }
+
+  @Override
+  public boolean offerHashed(int hashedValue) {
+    // j becomes the binary address determined by the first b log2m of x
+    // j will be between 0 and 2^log2m
+    final int j = hashedValue >>> (Integer.SIZE - log2m);
+    final int r = Integer.numberOfLeadingZeros((hashedValue << this.log2m) | (1 << (this.log2m - 1)) + 1) + 1;
+    return registerSet.updateIfGreater(j, r);
+  }
+
+  @Override
+  public boolean offer(Object o) {
+    final int x = MurmurHash.hash(o);
+    return offerHashed(x);
+  }
+
+
+  @Override
+  public long cardinality() {
+    double registerSum = 0;
+    int count = registerSet.count;
+    double zeros = 0.0;
+    for (int j = 0; j < registerSet.count; j++) {
+      int val = registerSet.get(j);
+      registerSum += 1.0 / (1 << val);
+      if (val == 0) {
+        zeros++;
+      }
+    }
+
+    double estimate = alphaMM * (1 / registerSum);
+
+    if (estimate <= (5.0 / 2.0) * count) {
+      // Small Range Estimate
+      return Math.round(linearCounting(count, zeros));
+    } else {
+      return Math.round(estimate);
+    }
+  }
+
+  @Override
+  public int sizeof() {
+    return registerSet.size * 4;
+  }
+
+  @Override
+  public byte[] getBytes() throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutput dos = new DataOutputStream(baos);
+    writeBytes(dos);
+
+    return baos.toByteArray();
+  }
+
+  private void writeBytes(DataOutput serializedByteStream) throws IOException {
+    serializedByteStream.writeInt(log2m);
+    serializedByteStream.writeInt(registerSet.size * 4);
+    for (int x : registerSet.readOnlyBits()) {
+      serializedByteStream.writeInt(x);
+    }
+  }
+
+  /**
+   * Add all the elements of the other set to this set.
+   * <p/>
+   * This operation does not imply a loss of precision.
+   *
+   * @param other A compatible Hyperloglog instance (same log2m)
+   * @throws CardinalityMergeException if other is not compatible
+   */
+  public void addAll(HyperLogLog other) throws CardinalityMergeException {
+    if (this.sizeof() != other.sizeof()) {
+      throw new HyperLogLogMergeException("Cannot merge estimators of different sizes");
+    }
+
+    registerSet.merge(other.registerSet);
+  }
+
+  @Override
+  public ICardinality merge(ICardinality... estimators) throws CardinalityMergeException {
+    HyperLogLog merged = new HyperLogLog(HllExecutor.DEFAULT_HLL_STD_DEV);//new HyperLogLog(log2m, new RegisterSet(this.registerSet.count));
+    merged.addAll(this);
+
+    if (estimators == null) {
+      return merged;
+    }
+
+    for (ICardinality estimator : estimators) {
+      if (!(estimator instanceof HyperLogLog)) {
+        throw new HyperLogLogMergeException("Cannot merge estimators of different class");
+      }
+      HyperLogLog hll = (HyperLogLog) estimator;
+      merged.addAll(hll);
+    }
+
+    return merged;
+  }
+
+  private Object writeReplace() {
+    return new SerializationHolder(this);
+  }
+
+  /**
+   * This class exists to support Externalizable semantics for
+   * HyperLogLog objects without having to expose a public
+   * constructor, public write/read methods, or pretend final
+   * fields aren't final.
+   *
+   * In short, Externalizable allows you to skip some of the more
+   * verbose meta-data default Serializable gets you, but still
+   * includes the class name. In that sense, there is some cost
+   * to this holder object because it has a longer class name. I
+   * imagine people who care about optimizing for that have their
+   * own work-around for long class names in general, or just use
+   * a custom serialization framework. Therefore we make no attempt
+   * to optimize that here (eg. by raising this from an inner class
+   * and giving it an unhelpful name).
+   */
+  private static class SerializationHolder implements Externalizable {
+
+    HyperLogLog hyperLogLogHolder;
+
+    public SerializationHolder(HyperLogLog hyperLogLogHolder) {
+      this.hyperLogLogHolder = hyperLogLogHolder;
+    }
+
+    /**
+     * required for Externalizable
+     */
+    @SuppressWarnings("unused")
+    public SerializationHolder() {
+
+    }
+
+    @Override
+    public void writeExternal(ObjectOutput out) throws IOException {
+      hyperLogLogHolder.writeBytes(out);
+    }
+
+    @Override
+    public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+      hyperLogLogHolder = Builder.build(in);
+    }
+
+    private Object readResolve() {
+      return hyperLogLogHolder;
+    }
+  }
+
+  public static class Builder implements IBuilder<ICardinality>, Serializable {
+
+    private static final long serialVersionUID = -979314356097156719L;
+    private double rsd;
+
+    public Builder(double rsd) {
+      this.rsd = rsd;
+    }
+
+    @Override
+    public HyperLogLog build() {
+      return new HyperLogLog(rsd);
+    }
+
+    @Override
+    public int sizeof() {
+      int log2m = log2m(rsd);
+      int k = 1 << log2m;
+      return RegisterSet.getBits(k) * 4;
+    }
+
+    public static HyperLogLog build(byte[] bytes) throws IOException {
+      ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+      return build(new DataInputStream(bais));
+    }
+
+    public static HyperLogLog build(DataInput serializedByteStream) throws IOException {
+      int log2m = serializedByteStream.readInt();
+      int byteArraySize = serializedByteStream.readInt();
+      return new HyperLogLog(log2m,
+          new RegisterSet(1 << log2m, Bits.getBits(serializedByteStream, byteArraySize)));
+    }
+  }
+
+  @SuppressWarnings("serial")
+  protected static class HyperLogLogMergeException extends CardinalityMergeException {
+
+    public HyperLogLogMergeException(String message) {
+      super(message);
+    }
+  }
+
+  protected static double getAlphaMM(final int p, final int m) {
+    // See the paper.
+    switch (p) {
+    case 4:
+      return 0.673 * m * m;
+    case 5:
+      return 0.697 * m * m;
+    case 6:
+      return 0.709 * m * m;
+    default:
+      return (0.7213 / (1 + 1.079 / m)) * m * m;
+    }
+  }
+
+  protected static double linearCounting(int m, double V) {
+    return m * Math.log(m / V);
+  }
+}


Mime
View raw message