asterixdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From il...@apache.org
Subject [4/8] incubator-asterixdb-hyracks git commit: Fist verison of LSM-based statistics collection
Date Sat, 05 Dec 2015 00:50:17 GMT
Fist verison of LSM-based statistics collection


Project: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/commit/cf029cf3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/tree/cf029cf3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/diff/cf029cf3

Branch: refs/heads/statistics
Commit: cf029cf34532b3d460177ea33df30d712d79c44a
Parents: 5ae0df8
Author: Ildar Absalyamov <ildar.absalyamov@gmail.com>
Authored: Sun Oct 25 21:15:48 2015 +0500
Committer: Ildar Absalyamov <ildar.absalyamov@gmail.com>
Committed: Sun Oct 25 21:15:48 2015 +0500

----------------------------------------------------------------------
 .../org/apache/hyracks/data/std/api/IMath.java  |  26 ++
 .../data/std/primitive/IntegerPointable.java    |  69 ++++-
 .../lsm/btree/LSMBTreeOperatorTestHelper.java   |   3 +-
 .../am/bloomfilter/impls/BloomFilter.java       |  88 ++-----
 .../bloomfilter/impls/BloomFilterFactory.java   |   2 +-
 .../api/IPrimitiveIntegerValueProvider.java     |  30 +++
 .../IPrimitiveIntegerValueProviderFactory.java  |  26 ++
 .../am/common/api/IPrimitiveValueProvider.java  |   2 +-
 ...bleIntegerPrimitiveValueProviderFactory.java |  52 ++++
 .../am/common/impls/AbstractFileManager.java    | 102 +++++++
 .../am/common/impls/AbstractTreeIndex.java      | 164 ++++--------
 .../am/common/statistics/StatisticsFactory.java |  31 +++
 .../storage/am/common/statistics/Synopsis.java  |  22 ++
 .../am/common/statistics/SynopsisType.java      |   7 +
 .../statistics/sketch/GroupCountSketch.java     |  90 +++++++
 .../common/statistics/sketch/HashGenerator.java |  33 +++
 .../common/statistics/sketch/QuickSelect.java   |  47 ++++
 .../am/common/statistics/sketch/Sketch.java     |  24 ++
 .../statistics/sketch/SketchSynopsis.java       |  83 ++++++
 .../statistics/wavelet/WaveletCoefficient.java  |  83 ++++++
 .../statistics/wavelet/WaveletSynopsis.java     | 263 +++++++++++++++++++
 .../AbstractLSMBTreeDataflowHelper.java         |  34 +++
 .../AbstractLSMBTreeDataflowHelperFactory.java  |  35 +++
 .../dataflow/ExternalBTreeDataflowHelper.java   |  22 +-
 .../ExternalBTreeDataflowHelperFactory.java     |  13 +-
 .../ExternalBTreeWithBuddyDataflowHelper.java   |  35 +--
 ...rnalBTreeWithBuddyDataflowHelperFactory.java |  13 +-
 .../btree/dataflow/LSMBTreeDataflowHelper.java  |  20 +-
 .../dataflow/LSMBTreeDataflowHelperFactory.java |  11 +-
 .../am/lsm/btree/impls/ExternalBTree.java       | 107 +++++---
 .../lsm/btree/impls/ExternalBTreeWithBuddy.java |  44 ++--
 .../storage/am/lsm/btree/impls/LSMBTree.java    | 179 ++++++++-----
 .../lsm/btree/impls/LSMBTreeDiskComponent.java  |  17 +-
 .../impls/LSMBTreeDiskComponentFactory.java     |  14 +-
 .../am/lsm/btree/impls/LSMBTreeFileManager.java |  22 +-
 .../lsm/btree/impls/LSMBTreeFlushOperation.java |  67 +----
 .../lsm/btree/impls/LSMBTreeMergeOperation.java |  63 +----
 .../btree/impls/LSMBTreePointSearchCursor.java  |   3 +-
 .../impls/LSMBTreeWithBuddyDiskComponent.java   |  14 +-
 .../LSMBTreeWithBuddyDiskComponentFactory.java  |   2 +-
 .../impls/LSMBTreeWithBuddyFileManager.java     |  27 +-
 .../impls/LSMBTreeWithBuddyMergeOperation.java  | 143 +++-------
 .../am/lsm/btree/util/LSMBTreeUtils.java        |  32 ++-
 .../common/impls/AbstractDiskLSMComponent.java  |  21 +-
 .../common/impls/AbstractLSMFlushOperation.java |  53 ++++
 .../impls/AbstractLSMIndexFileManager.java      |  20 +-
 .../common/impls/AbstractLSMMergeOperation.java |  58 ++++
 .../lsm/common/impls/AbstractLSMOperation.java  |  52 ++++
 ...ytePrimitiveIntegerValueProviderFactory.java |  57 ++++
 ...gerPrimitiveIntegerValueProviderFactory.java |  57 ++++
 .../impls/LSMComponentFileReferences.java       |   9 +-
 ...ongPrimitiveIntegerValueProviderFactory.java |  57 ++++
 ...ortPrimitiveIntegerValueProviderFactory.java |  57 ++++
 .../invertedindex/impls/LSMInvertedIndex.java   |  71 ++---
 .../impls/LSMInvertedIndexDiskComponent.java    |  16 +-
 .../LSMInvertedIndexDiskComponentFactory.java   |   2 +-
 .../impls/LSMInvertedIndexFileManager.java      |  20 +-
 .../impls/LSMInvertedIndexFlushOperation.java   |  58 +---
 .../impls/LSMInvertedIndexMergeOperation.java   |  62 +----
 .../am/lsm/rtree/impls/AbstractLSMRTree.java    |  23 +-
 .../storage/am/lsm/rtree/impls/LSMRTree.java    |   2 +-
 .../lsm/rtree/impls/LSMRTreeDiskComponent.java  |  13 +-
 .../impls/LSMRTreeDiskComponentFactory.java     |   2 +-
 .../am/lsm/rtree/impls/LSMRTreeFileManager.java |  24 +-
 .../lsm/rtree/impls/LSMRTreeFlushOperation.java |  54 +---
 .../am/lsm/btree/LSMBTreeExamplesTest.java      |  12 +-
 ...MBTreeModificationOperationCallbackTest.java |   5 +-
 .../LSMBTreeSearchOperationCallbackTest.java    |   7 +-
 .../multithread/LSMBTreeMultiThreadTest.java    |  30 +--
 .../am/lsm/btree/perf/LSMTreeRunner.java        |   2 +-
 .../am/lsm/btree/util/LSMBTreeTestContext.java  |   7 +-
 71 files changed, 2096 insertions(+), 919 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/IMath.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/IMath.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/IMath.java
new file mode 100644
index 0000000..b36f7e9
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/IMath.java
@@ -0,0 +1,26 @@
+package org.apache.hyracks.data.std.api;
+
+public interface IMath<T> {
+
+    public T and(Number mask);
+
+    public T shiftRight(Number positions);
+
+    public T shiftLeft(Number positions);
+
+    public T add(Number summand);
+
+    public T add(T pointableSummand);
+
+    public T sub(Number subtrahend);
+
+    public T sub(T pointableSubtrahend);
+
+    public T div(Number subtrahend);
+
+    public T div(T pointableSubtrahend);
+
+    public T mult(Number subtrahend);
+
+    public T mult(T pointableSubtrahend);
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/IntegerPointable.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/IntegerPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/IntegerPointable.java
index ee0d72e..4ebab0d 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/IntegerPointable.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/IntegerPointable.java
@@ -26,7 +26,8 @@ import org.apache.hyracks.data.std.api.INumeric;
 import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.api.IPointableFactory;
 
-public final class IntegerPointable extends AbstractPointable implements IHashable, IComparable, INumeric {
+public final class IntegerPointable extends AbstractPointable
+        implements IHashable, IComparable, INumeric/*, IMath<IntegerPointable>*/ {
     public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
         private static final long serialVersionUID = 1L;
 
@@ -135,4 +136,70 @@ public final class IntegerPointable extends AbstractPointable implements IHashab
     public double doubleValue() {
         return getInteger();
     }
+
+    //    @Override
+    //    public IntegerPointable shiftRight(Number positions) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
+    //
+    //    @Override
+    //    public IntegerPointable shiftLeft(Number positions) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
+    //
+    //    @Override
+    //    public IntegerPointable add(Number summand) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
+    //
+    //    @Override
+    //    public IntegerPointable add(IntegerPointable pointableSummand) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
+    //
+    //    @Override
+    //    public IntegerPointable sub(Number subtrahend) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
+    //
+    //    @Override
+    //    public IntegerPointable sub(IntegerPointable pointableSubtrahend) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
+    //
+    //    @Override
+    //    public IntegerPointable and(Number mask) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
+    //
+    //    @Override
+    //    public IntegerPointable div(Number subtrahend) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
+    //
+    //    @Override
+    //    public IntegerPointable div(IntegerPointable pointableSubtrahend) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
+    //
+    //    @Override
+    //    public IntegerPointable mult(Number subtrahend) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
+    //
+    //    @Override
+    //    public IntegerPointable mult(IntegerPointable pointableSubtrahend) {
+    //        // TODO Auto-generated method stub
+    //        return null;
+    //    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java
index 80f4d34..cbee47e 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java
@@ -34,6 +34,7 @@ import org.apache.hyracks.tests.am.common.LSMTreeOperatorTestHelper;
 public class LSMBTreeOperatorTestHelper extends LSMTreeOperatorTestHelper {
 
     private static final Map<String, String> MERGE_POLICY_PROPERTIES;
+
     static {
         MERGE_POLICY_PROPERTIES = new HashMap<String, String>();
         MERGE_POLICY_PROPERTIES.put("num-components", "3");
@@ -47,7 +48,7 @@ public class LSMBTreeOperatorTestHelper extends LSMTreeOperatorTestHelper {
         return new LSMBTreeDataflowHelperFactory(virtualBufferCacheProvider, new ConstantMergePolicyFactory(),
                 MERGE_POLICY_PROPERTIES, ThreadCountingOperationTrackerProvider.INSTANCE,
                 SynchronousSchedulerProvider.INSTANCE, NoOpIOOperationCallback.INSTANCE,
-                DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE, true, null, null, null, null, true);
+                DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE, true, null, null, null, null, true, false, null);
     }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java b/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
index b62e483..f8db571 100644
--- a/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
+++ b/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
@@ -26,12 +26,13 @@ import org.apache.hyracks.api.io.FileReference;
 import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
 import org.apache.hyracks.storage.am.common.api.IIndexBulkLoader;
 import org.apache.hyracks.storage.am.common.api.IndexException;
+import org.apache.hyracks.storage.am.common.impls.AbstractFileManager;
 import org.apache.hyracks.storage.common.buffercache.IBufferCache;
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
 import org.apache.hyracks.storage.common.file.BufferedFileHandle;
 import org.apache.hyracks.storage.common.file.IFileMapProvider;
 
-public class BloomFilter {
+public class BloomFilter extends AbstractFileManager {
 
     private final static int METADATA_PAGE_ID = 0;
     private final static int NUM_PAGES_OFFSET = 0; // 0
@@ -39,12 +40,7 @@ public class BloomFilter {
     private final static int NUM_ELEMENTS_OFFSET = NUM_HASHES_USED_OFFSET + 4; // 8
     private final static int NUM_BITS_OFFSET = NUM_ELEMENTS_OFFSET + 8; // 12
 
-    private final IBufferCache bufferCache;
-    private final IFileMapProvider fileMapProvider;
-    private final FileReference file;
     private final int[] keyFields;
-    private int fileId = -1;
-    private boolean isActivated = false;
 
     private int numPages;
     private int numHashes;
@@ -56,21 +52,11 @@ public class BloomFilter {
 
     public BloomFilter(IBufferCache bufferCache, IFileMapProvider fileMapProvider, FileReference file, int[] keyFields)
             throws HyracksDataException {
-        this.bufferCache = bufferCache;
-        this.fileMapProvider = fileMapProvider;
-        this.file = file;
+        super(bufferCache, fileMapProvider, file);
         this.keyFields = keyFields;
         this.numBitsPerPage = bufferCache.getPageSize() * Byte.SIZE;
     }
 
-    public int getFileId() {
-        return fileId;
-    }
-
-    public FileReference getFileReference() {
-        return file;
-    }
-
     public int getNumPages() throws HyracksDataException {
         if (!isActivated) {
             throw new HyracksDataException("The bloom filter is not activated.");
@@ -94,8 +80,8 @@ public class BloomFilter {
             long hash = Math.abs((hashes[0] + i * hashes[1]) % numBits);
 
             // we increment the page id by one, since the metadata page id of the filter is 0.
-            ICachedPage page = bufferCache.pin(
-                    BufferedFileHandle.getDiskPageId(fileId, (int) (hash / numBitsPerPage) + 1), false);
+            ICachedPage page = bufferCache
+                    .pin(BufferedFileHandle.getDiskPageId(fileId, (int) (hash / numBitsPerPage) + 1), false);
             page.acquireReadLatch();
             try {
                 ByteBuffer buffer = page.getBuffer();
@@ -115,32 +101,15 @@ public class BloomFilter {
         return true;
     }
 
-    private void prepareFile() throws HyracksDataException {
-        boolean fileIsMapped = false;
-        synchronized (fileMapProvider) {
-            fileIsMapped = fileMapProvider.isMapped(file);
-            if (!fileIsMapped) {
-                bufferCache.createFile(file);
-            }
-            fileId = fileMapProvider.lookupFileId(file);
-            try {
-                // Also creates the file if it doesn't exist yet.
-                bufferCache.openFile(fileId);
-            } catch (HyracksDataException e) {
-                // Revert state of buffer cache since file failed to open.
-                if (!fileIsMapped) {
-                    bufferCache.deleteFile(fileId, false);
-                }
-                throw e;
-            }
-        }
+    @Override
+    public synchronized void create() throws HyracksDataException {
+        super.create();
+
+        initBloomFilterMetaData();
+        bufferCache.closeFile(fileId);
     }
 
-    public synchronized void create() throws HyracksDataException {
-        if (isActivated) {
-            throw new HyracksDataException("Failed to create the bloom filter since it is activated.");
-        }
-        prepareFile();
+    private void initBloomFilterMetaData() throws HyracksDataException {
         ICachedPage metaPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, METADATA_PAGE_ID), true);
         metaPage.acquireWriteLatch();
         try {
@@ -152,17 +121,13 @@ public class BloomFilter {
             metaPage.releaseWriteLatch(true);
             bufferCache.unpin(metaPage);
         }
-        bufferCache.closeFile(fileId);
     }
 
+    @Override
     public synchronized void activate() throws HyracksDataException {
-        if (isActivated) {
-            return;
-        }
+        super.activate();
 
-        prepareFile();
         readBloomFilterMetaData();
-        isActivated = true;
     }
 
     private void readBloomFilterMetaData() throws HyracksDataException {
@@ -179,27 +144,6 @@ public class BloomFilter {
         }
     }
 
-    public synchronized void deactivate() throws HyracksDataException {
-        if (!isActivated) {
-            return;
-        }
-        bufferCache.closeFile(fileId);
-        isActivated = false;
-    }
-
-    public synchronized void destroy() throws HyracksDataException {
-        if (isActivated) {
-            throw new HyracksDataException("Failed to destroy the bloom filter since it is activated.");
-        }
-
-        file.delete();
-        if (fileId == -1) {
-            return;
-        }
-        bufferCache.deleteFile(fileId, false);
-        fileId = -1;
-    }
-
     public IIndexBulkLoader createBuilder(long numElements, int numHashes, int numBitsPerElement)
             throws HyracksDataException {
         return new BloomFilterBuilder(numElements, numHashes, numBitsPerElement);
@@ -279,8 +223,8 @@ public class BloomFilter {
                 long hash = Math.abs((hashes[0] + i * hashes[1]) % numBits);
 
                 // we increment the page id by one, since the metadata page id of the filter is 0.
-                ICachedPage page = bufferCache.pin(
-                        BufferedFileHandle.getDiskPageId(fileId, (int) (hash / numBitsPerPage) + 1), false);
+                ICachedPage page = bufferCache
+                        .pin(BufferedFileHandle.getDiskPageId(fileId, (int) (hash / numBitsPerPage) + 1), false);
                 page.acquireWriteLatch();
                 try {
                     ByteBuffer buffer = page.getBuffer();

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java b/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java
index c3a1718..aa9a8b3 100644
--- a/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java
+++ b/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java
@@ -35,7 +35,7 @@ public class BloomFilterFactory {
         this.bloomFilterKeyFields = bloomFilterKeyFields;
     }
 
-    public BloomFilter createBloomFiltertInstance(FileReference file) throws HyracksDataException {
+    public BloomFilter createBloomFilterInstance(FileReference file) throws HyracksDataException {
         return new BloomFilter(bufferCache, fileMapProvider, file, bloomFilterKeyFields);
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveIntegerValueProvider.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveIntegerValueProvider.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveIntegerValueProvider.java
new file mode 100644
index 0000000..e5bd178
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveIntegerValueProvider.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.api;
+
+public interface IPrimitiveIntegerValueProvider {
+    public long getValue(byte[] bytes, int offset);
+
+    public long minDomainValue(byte[] bytes, int offset);
+
+    public long maxDomainValue(byte[] bytes, int offset);
+
+    public int maxLevel(byte[] bytes, int offset);
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveIntegerValueProviderFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveIntegerValueProviderFactory.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveIntegerValueProviderFactory.java
new file mode 100644
index 0000000..bbde1eb
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveIntegerValueProviderFactory.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.api;
+
+import java.io.Serializable;
+
+public interface IPrimitiveIntegerValueProviderFactory extends Serializable {
+    public IPrimitiveIntegerValueProvider createPrimitiveIntegerValueProvider();
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveValueProvider.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveValueProvider.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveValueProvider.java
index fd9a0bc..d9ab69f 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveValueProvider.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IPrimitiveValueProvider.java
@@ -20,5 +20,5 @@
 package org.apache.hyracks.storage.am.common.api;
 
 public interface IPrimitiveValueProvider {
-	public double getValue(byte[] bytes, int offset);
+    public double getValue(byte[] bytes, int offset);
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/data/PointableIntegerPrimitiveValueProviderFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/data/PointableIntegerPrimitiveValueProviderFactory.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/data/PointableIntegerPrimitiveValueProviderFactory.java
new file mode 100644
index 0000000..8b211f8
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/data/PointableIntegerPrimitiveValueProviderFactory.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.storage.am.common.data;
+
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.data.std.api.INumeric;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.api.IPointableFactory;
+import org.apache.hyracks.storage.am.common.api.IPrimitiveValueProvider;
+import org.apache.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory;
+
+public class PointableIntegerPrimitiveValueProviderFactory implements IPrimitiveValueProviderFactory {
+    private static final long serialVersionUID = 1L;
+
+    private final IPointableFactory pf;
+    private final IPointable p;
+    private final ITypeTraits traits;
+
+    public PointableIntegerPrimitiveValueProviderFactory(IPointableFactory pf) {
+        this.pf = pf;
+        p = pf.createPointable();
+        traits = pf.getTypeTraits();
+    }
+
+    @Override
+    public IPrimitiveValueProvider createPrimitiveValueProvider() {
+        final int length = traits.getFixedLength();
+        return new IPrimitiveValueProvider() {
+            @Override
+            public double getValue(byte[] bytes, int offset) {
+                p.set(bytes, offset, length);
+                return ((INumeric) p).doubleValue();
+            }
+        };
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractFileManager.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractFileManager.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractFileManager.java
new file mode 100644
index 0000000..baceb77
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractFileManager.java
@@ -0,0 +1,102 @@
+package org.apache.hyracks.storage.am.common.impls;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.io.FileReference;
+import org.apache.hyracks.storage.common.buffercache.IBufferCache;
+import org.apache.hyracks.storage.common.file.IFileMapProvider;
+
+public abstract class AbstractFileManager {
+
+    protected final IBufferCache bufferCache;
+    protected final IFileMapProvider fileMapProvider;
+    protected FileReference file;
+    protected int fileId = -1;
+    protected boolean isActivated = false;
+
+    public AbstractFileManager(IBufferCache bufferCache, IFileMapProvider fileMapProvider, FileReference file) {
+        this.bufferCache = bufferCache;
+        this.fileMapProvider = fileMapProvider;
+        this.file = file;
+    }
+
+    public synchronized void create() throws HyracksDataException {
+        if (isActivated) {
+            throw new HyracksDataException("Failed to create " + toString() + " since it is activated.");
+        }
+
+        prepareFile();
+    }
+
+    private void prepareFile() throws HyracksDataException {
+        boolean fileIsMapped = false;
+        synchronized (fileMapProvider) {
+            fileIsMapped = fileMapProvider.isMapped(file);
+            if (!fileIsMapped) {
+                bufferCache.createFile(file);
+            }
+            fileId = fileMapProvider.lookupFileId(file);
+            try {
+                // Also creates the file if it doesn't exist yet.
+                bufferCache.openFile(fileId);
+            } catch (HyracksDataException e) {
+                // Revert state of buffer cache since file failed to open.
+                if (!fileIsMapped) {
+                    bufferCache.deleteFile(fileId, false);
+                }
+                throw e;
+            }
+        }
+    }
+
+    public synchronized void activate() throws HyracksDataException {
+        if (isActivated) {
+            throw new HyracksDataException("Failed to activate " + toString() + " since it is already activated.");
+        }
+
+        prepareFile();
+
+        isActivated = true;
+    }
+
+    public synchronized void deactivate() throws HyracksDataException {
+        if (!isActivated) {
+            throw new HyracksDataException("Failed to deactivate " + toString() + " since it is already deactivated.");
+        }
+
+        bufferCache.closeFile(fileId);
+
+        isActivated = false;
+    }
+
+    public synchronized void destroy() throws HyracksDataException {
+        if (isActivated) {
+            throw new HyracksDataException("Failed to destroy " + toString() + " since it is activated.");
+        }
+
+        if (fileId == -1) {
+            return;
+        }
+        bufferCache.deleteFile(fileId, false);
+        file.delete();
+        fileId = -1;
+    }
+
+    public synchronized void clear() throws HyracksDataException {
+        if (!isActivated) {
+            throw new HyracksDataException("Failed to clear the index since it is not activated.");
+        }
+    }
+
+    public int getFileId() {
+        return fileId;
+    }
+
+    public FileReference getFileReference() {
+        return file;
+    }
+
+    public IBufferCache getBufferCache() {
+        return bufferCache;
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractTreeIndex.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractTreeIndex.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractTreeIndex.java
index e7b535f..296ba6a 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractTreeIndex.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractTreeIndex.java
@@ -41,12 +41,10 @@ import org.apache.hyracks.storage.common.buffercache.ICachedPage;
 import org.apache.hyracks.storage.common.file.BufferedFileHandle;
 import org.apache.hyracks.storage.common.file.IFileMapProvider;
 
-public abstract class AbstractTreeIndex implements ITreeIndex {
+public abstract class AbstractTreeIndex extends AbstractFileManager implements ITreeIndex {
 
     protected final static int rootPage = 1;
 
-    protected final IBufferCache bufferCache;
-    protected final IFileMapProvider fileMapProvider;
     protected final IFreePageManager freePageManager;
 
     protected final ITreeIndexFrameFactory interiorFrameFactory;
@@ -55,56 +53,19 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
     protected final IBinaryComparatorFactory[] cmpFactories;
     protected final int fieldCount;
 
-    protected FileReference file;
-    protected int fileId = -1;
-
-    private boolean isActivated = false;
-
     public AbstractTreeIndex(IBufferCache bufferCache, IFileMapProvider fileMapProvider,
             IFreePageManager freePageManager, ITreeIndexFrameFactory interiorFrameFactory,
             ITreeIndexFrameFactory leafFrameFactory, IBinaryComparatorFactory[] cmpFactories, int fieldCount,
             FileReference file) {
-        this.bufferCache = bufferCache;
-        this.fileMapProvider = fileMapProvider;
+        super(bufferCache, fileMapProvider, file);
         this.freePageManager = freePageManager;
         this.interiorFrameFactory = interiorFrameFactory;
         this.leafFrameFactory = leafFrameFactory;
         this.cmpFactories = cmpFactories;
         this.fieldCount = fieldCount;
-        this.file = file;
     }
 
-    public synchronized void create() throws HyracksDataException {
-        if (isActivated) {
-            throw new HyracksDataException("Failed to create the index since it is activated.");
-        }
-
-        boolean fileIsMapped = false;
-        synchronized (fileMapProvider) {
-            fileIsMapped = fileMapProvider.isMapped(file);
-            if (!fileIsMapped) {
-                bufferCache.createFile(file);
-            }
-            fileId = fileMapProvider.lookupFileId(file);
-            try {
-                // Also creates the file if it doesn't exist yet.
-                bufferCache.openFile(fileId);
-            } catch (HyracksDataException e) {
-                // Revert state of buffer cache since file failed to open.
-                if (!fileIsMapped) {
-                    bufferCache.deleteFile(fileId, false);
-                }
-                throw e;
-            }
-        }
-
-        freePageManager.open(fileId);
-        initEmptyTree();
-        freePageManager.close();
-        bufferCache.closeFile(fileId);
-    }
-
-    private void initEmptyTree() throws HyracksDataException {
+    void initEmptyTree() throws HyracksDataException {
         ITreeIndexFrame frame = leafFrameFactory.createFrame();
         ITreeIndexMetaDataFrame metaFrame = freePageManager.getMetaDataFrameFactory().createFrame();
         freePageManager.init(metaFrame, rootPage);
@@ -120,68 +81,6 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
         }
     }
 
-    public synchronized void activate() throws HyracksDataException {
-        if (isActivated) {
-            throw new HyracksDataException("Failed to activate the index since it is already activated.");
-        }
-
-        boolean fileIsMapped = false;
-        synchronized (fileMapProvider) {
-            fileIsMapped = fileMapProvider.isMapped(file);
-            if (!fileIsMapped) {
-                bufferCache.createFile(file);
-            }
-            fileId = fileMapProvider.lookupFileId(file);
-            try {
-                // Also creates the file if it doesn't exist yet.
-                bufferCache.openFile(fileId);
-            } catch (HyracksDataException e) {
-                // Revert state of buffer cache since file failed to open.
-                if (!fileIsMapped) {
-                    bufferCache.deleteFile(fileId, false);
-                }
-                throw e;
-            }
-        }
-        freePageManager.open(fileId);
-
-        // TODO: Should probably have some way to check that the tree is physically consistent
-        // or that the file we just opened actually is a tree
-
-        isActivated = true;
-    }
-
-    public synchronized void deactivate() throws HyracksDataException {
-        if (!isActivated) {
-            throw new HyracksDataException("Failed to deactivate the index since it is already deactivated.");
-        }
-
-        bufferCache.closeFile(fileId);
-        freePageManager.close();
-
-        isActivated = false;
-    }
-
-    public synchronized void destroy() throws HyracksDataException {
-        if (isActivated) {
-            throw new HyracksDataException("Failed to destroy the index since it is activated.");
-        }
-
-        if (fileId == -1) {
-            return;
-        }
-        bufferCache.deleteFile(fileId, false);
-        file.delete();
-        fileId = -1;
-    }
-
-    public synchronized void clear() throws HyracksDataException {
-        if (!isActivated) {
-            throw new HyracksDataException("Failed to clear the index since it is not activated.");
-        }
-        initEmptyTree();
-    }
-
     public boolean isEmptyTree(ITreeIndexFrame frame) throws HyracksDataException {
         ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), false);
         rootNode.acquireReadLatch();
@@ -210,42 +109,68 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
         }
     }
 
-    public int getFileId() {
-        return fileId;
-    }
-
-    public FileReference getFileReference() {
-        return file;
-    }
-
-    public IBufferCache getBufferCache() {
-        return bufferCache;
-    }
-
+    @Override
     public ITreeIndexFrameFactory getInteriorFrameFactory() {
         return interiorFrameFactory;
     }
 
+    @Override
     public ITreeIndexFrameFactory getLeafFrameFactory() {
         return leafFrameFactory;
     }
 
+    @Override
     public IBinaryComparatorFactory[] getComparatorFactories() {
         return cmpFactories;
     }
 
+    @Override
     public IFreePageManager getFreePageManager() {
         return freePageManager;
     }
 
+    @Override
     public int getRootPageId() {
         return rootPage;
     }
 
+    @Override
     public int getFieldCount() {
         return fieldCount;
     }
 
+    @Override
+    public synchronized void create() throws HyracksDataException {
+        super.create();
+
+        freePageManager.open(fileId);
+        initEmptyTree();
+        freePageManager.close();
+        bufferCache.closeFile(fileId);
+    }
+
+    @Override
+    public synchronized void clear() throws HyracksDataException {
+        super.clear();
+
+        initEmptyTree();
+    }
+
+    @Override
+    public synchronized void deactivate() throws HyracksDataException {
+        super.deactivate();
+        freePageManager.close();
+    }
+
+    @Override
+    public synchronized void activate() throws HyracksDataException {
+        super.activate();
+        freePageManager.open(fileId);
+
+        // TODO: Should probably have some way to check that the tree is physically consistent
+        // or that the file we just opened actually is a tree
+    }
+
     public abstract class AbstractTreeIndexBulkLoader implements IIndexBulkLoader {
         protected final MultiComparator cmp;
         protected final int slotSize;
@@ -281,16 +206,17 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
 
             interiorFrame.setPage(leafFrontier.page);
             interiorFrame.initBuffer((byte) 0);
-            interiorMaxBytes = (int) ((float) interiorFrame.getBuffer().capacity() * fillFactor);
+            interiorMaxBytes = (int) (interiorFrame.getBuffer().capacity() * fillFactor);
 
             leafFrame.setPage(leafFrontier.page);
             leafFrame.initBuffer((byte) 0);
-            leafMaxBytes = (int) ((float) leafFrame.getBuffer().capacity() * fillFactor);
+            leafMaxBytes = (int) (leafFrame.getBuffer().capacity() * fillFactor);
             slotSize = leafFrame.getSlotSize();
 
             nodeFrontiers.add(leafFrontier);
         }
 
+        @Override
         public abstract void add(ITupleReference tuple) throws IndexException, HyracksDataException;
 
         protected void handleException() throws HyracksDataException {
@@ -383,7 +309,7 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
     public IBinaryComparatorFactory[] getCmpFactories() {
         return cmpFactories;
     }
-    
+
     @Override
     public boolean hasMemoryComponents() {
         return true;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/StatisticsFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/StatisticsFactory.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/StatisticsFactory.java
new file mode 100644
index 0000000..2e3e127
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/StatisticsFactory.java
@@ -0,0 +1,31 @@
+package org.apache.hyracks.storage.am.common.statistics;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.io.FileReference;
+import org.apache.hyracks.storage.am.common.api.IPrimitiveIntegerValueProviderFactory;
+import org.apache.hyracks.storage.am.common.statistics.wavelet.WaveletSynopsis;
+import org.apache.hyracks.storage.common.buffercache.IBufferCache;
+import org.apache.hyracks.storage.common.file.IFileMapProvider;
+
+public class StatisticsFactory {
+    private final IBufferCache bufferCache;
+    private final IFileMapProvider fileMapProvider;
+    private final int[] statisticsKeyFields;
+    private final IPrimitiveIntegerValueProviderFactory valueProviderFactory;
+
+    public StatisticsFactory(IBufferCache bufferCache, IFileMapProvider fileMapProvider, int[] statisticsKeyFields,
+            IPrimitiveIntegerValueProviderFactory valueProviderFactory) {
+        this.bufferCache = bufferCache;
+        this.fileMapProvider = fileMapProvider;
+        this.statisticsKeyFields = statisticsKeyFields;
+        this.valueProviderFactory = valueProviderFactory;
+    }
+
+    public WaveletSynopsis createWaveletStatistics(FileReference file) throws HyracksDataException {
+        return new WaveletSynopsis(bufferCache, fileMapProvider, file, statisticsKeyFields, 10, valueProviderFactory);
+    }
+
+    public int[] getStatisticsFields() {
+        return statisticsKeyFields;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/Synopsis.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/Synopsis.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/Synopsis.java
new file mode 100644
index 0000000..7b14822
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/Synopsis.java
@@ -0,0 +1,22 @@
+package org.apache.hyracks.storage.am.common.statistics;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.io.FileReference;
+import org.apache.hyracks.storage.am.common.api.IIndexBulkLoader;
+import org.apache.hyracks.storage.am.common.impls.AbstractFileManager;
+import org.apache.hyracks.storage.common.buffercache.IBufferCache;
+import org.apache.hyracks.storage.common.file.IFileMapProvider;
+
+public abstract class Synopsis<K, V> extends AbstractFileManager {
+
+    public Synopsis(IBufferCache bufferCache, IFileMapProvider fileMapProvider, FileReference file) {
+        super(bufferCache, fileMapProvider, file);
+    }
+
+    public abstract void addElement(K key, V value);
+
+    public abstract IIndexBulkLoader createBuilder() throws HyracksDataException;
+
+    public abstract int getNumPages() throws HyracksDataException;
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/SynopsisType.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/SynopsisType.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/SynopsisType.java
new file mode 100644
index 0000000..6fda029
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/SynopsisType.java
@@ -0,0 +1,7 @@
+package org.apache.hyracks.storage.am.common.statistics;
+
+public enum SynopsisType {
+    None,
+    Wavelet,
+    Sketch
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/GroupCountSketch.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/GroupCountSketch.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/GroupCountSketch.java
new file mode 100644
index 0000000..c0d9d5c
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/GroupCountSketch.java
@@ -0,0 +1,90 @@
+package org.apache.hyracks.storage.am.common.statistics.sketch;
+
+public class GroupCountSketch extends Sketch {
+
+    private final int levels;
+    private final int depth;
+    private final int buckets;
+    private final int subbuckets;
+    private final int fanoutLog;
+    private final double counters[][][][];
+    private final long[][] hashSeeds;
+
+    public GroupCountSketch(int levels, int depth, int width, int fanoutLog/*double prob, double accuracy*/) {
+        this.levels = levels;
+        this.depth = depth;
+        this.buckets = width;
+        this.fanoutLog = fanoutLog;
+        this.subbuckets = width * width;
+
+        counters = new double[this.levels][this.depth][this.buckets][this.subbuckets];
+        hashSeeds = new long[this.depth][8];
+        initSeeds(this.depth, 8, hashSeeds);
+    }
+
+    public void update(long item, double diff) {
+        int i, j, h, f, mult;
+        long group;
+
+        for (i = 0; i < depth; i++) {
+            mult = HashGenerator.fourwise(this.hashSeeds[i][4], this.hashSeeds[i][5], this.hashSeeds[i][6],
+                    this.hashSeeds[i][7], item);
+
+            f = HashGenerator.hash31(this.hashSeeds[i][2], this.hashSeeds[i][3], item);
+            f = f % (this.subbuckets);
+
+            for (j = 0, group = item; j < levels; j++, group >>= fanoutLog) {
+
+                h = HashGenerator.hash31(this.hashSeeds[i][0], this.hashSeeds[i][1], group);
+                h = h % (this.buckets);
+
+                if ((mult & 1) == 1)
+                    this.counters[j][i][h][f] += diff;
+                else
+                    this.counters[j][i][h][f] -= diff;
+            }
+        }
+    }
+
+    public double count(int group, int level) {
+        int h, f, mult;
+        double[] estimates = new double[depth];
+
+        for (int i = 0; i < depth; i++) {
+            h = HashGenerator.hash31(this.hashSeeds[i][0], this.hashSeeds[i][1], group);
+            h = h % (this.buckets);
+
+            f = HashGenerator.hash31(this.hashSeeds[i][2], this.hashSeeds[i][3], group);
+            f = f % (this.subbuckets);
+
+            mult = HashGenerator.fourwise(this.hashSeeds[i][4], this.hashSeeds[i][5], this.hashSeeds[i][6],
+                    this.hashSeeds[i][7], group);
+            if ((mult & 1) == 1)
+                estimates[i] += this.counters[level][i][h][f];
+            else
+                estimates[i] -= this.counters[level][i][h][f];
+        }
+
+        return getMedian(estimates, depth);
+    }
+
+    public double energyEst(int group, int level) {
+        // estimate the F2 moment of the vector (sum of squares)
+
+        int i, j;
+        double z;
+
+        double estimates[] = new double[depth];
+        for (i = 0; i < depth; i++) {
+            int h = HashGenerator.hash31(this.hashSeeds[i][0], this.hashSeeds[i][1], group);
+            h = h % (this.buckets);
+            z = 0;
+            for (j = 0; j < this.subbuckets; j++) {
+                z += Math.pow(this.counters[level][i][h][j], 2.0);
+            }
+            estimates[i] = z;
+        }
+
+        return getMedian(estimates, depth);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/HashGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/HashGenerator.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/HashGenerator.java
new file mode 100644
index 0000000..3ace6a0
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/HashGenerator.java
@@ -0,0 +1,33 @@
+package org.apache.hyracks.storage.am.common.statistics.sketch;
+
+public class HashGenerator {
+
+    private static int MOD = 2147483647;
+    private static int HL = 31;
+
+    public static int hash31(long a, long b, long x) {
+
+        long result;
+
+        // return a hash of x using a and b mod (2^31 - 1)
+        // may need to do another mod afterwards, or drop high bits
+        // depending on d, number of bad guys
+        // 2^31 - 1 = 2147483647
+
+        //  result = ((long long) a)*((long long) x)+((long long) b);
+        result = (a * x) + b;
+        result = ((result >> HL) + result) & MOD;
+
+        return (int) result;
+    }
+
+    public static int fourwise(long a, long b, long c, long d, long x) {
+        int result;
+
+        // returns values that are 4-wise independent by repeated calls
+        // to the pairwise independent routine. 
+
+        result = hash31(hash31(hash31(x, a, b), x, c), x, d);
+        return result;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/QuickSelect.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/QuickSelect.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/QuickSelect.java
new file mode 100644
index 0000000..5a05915
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/QuickSelect.java
@@ -0,0 +1,47 @@
+package org.apache.hyracks.storage.am.common.statistics.sketch;
+
+import java.util.Random;
+
+public class QuickSelect {
+
+    private static Random rand = new Random();
+
+    private static int partition(double[] arr, int left, int right, int pivot) {
+        double pivotVal = arr[pivot];
+        swap(arr, pivot, right);
+        int storeIndex = left;
+        for (int i = left; i < right; i++) {
+            if (arr[i] < pivotVal) {
+                swap(arr, i, storeIndex);
+                storeIndex++;
+            }
+        }
+        swap(arr, right, storeIndex);
+        return storeIndex;
+    }
+
+    public static double select(double[] arr, int n) {
+        int left = 0;
+        int right = arr.length - 1;
+        while (right >= left) {
+            int pivotIndex = partition(arr, left, right, rand.nextInt(right - left + 1) + left);
+            if (pivotIndex == n) {
+                return arr[pivotIndex];
+            } else if (pivotIndex < n) {
+                left = pivotIndex + 1;
+            } else {
+                right = pivotIndex - 1;
+            }
+        }
+        return 0;
+    }
+
+    private static void swap(double[] arr, int i1, int i2) {
+        if (i1 != i2) {
+            double temp = arr[i1];
+            arr[i1] = arr[i2];
+            arr[i2] = temp;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/Sketch.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/Sketch.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/Sketch.java
new file mode 100644
index 0000000..aff50ac
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/Sketch.java
@@ -0,0 +1,24 @@
+package org.apache.hyracks.storage.am.common.statistics.sketch;
+
+import java.util.Random;
+
+public abstract class Sketch {
+
+    protected void initSeeds(int k, int m, long[][] hashSeeds) {
+        Random prng = new Random();
+
+        int j, i;
+        for (i = 0; i < k; i++) {
+            for (j = 0; j < m; j++) {
+                hashSeeds[i][j] = Math.abs(prng.nextLong()); //(int) prng.genInt();
+                // initialise the hash functions
+                // prng_int() should return a random integer
+                // uniformly distributed in the range 0..2^31
+            }
+        }
+    }
+
+    protected static double getMedian(double[] data, int length) {
+        return QuickSelect.select(data, data.length / 2);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/SketchSynopsis.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/SketchSynopsis.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/SketchSynopsis.java
new file mode 100644
index 0000000..586943a
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/sketch/SketchSynopsis.java
@@ -0,0 +1,83 @@
+package org.apache.hyracks.storage.am.common.statistics.sketch;
+
+//public class SketchSynopsis extends Synopsis implements IIndexBulkLoader {
+//
+//    private final int levelNum;
+//    private final int fanoutLog;
+//    private final double epsilon;
+//    private final GroupCountSketch gcSketch;
+//    private final IBufferCache bufferCache;
+//    private final IFileMapProvider fileMapProvider;
+//    private final FileReference file;
+//    private final int[] keyFields;
+//    private final int fileId = -1;
+//    private final boolean isActivated = false;
+//
+//    public SketchSynopsis(IBufferCache bufferCache, IFileMapProvider fileMapProvider, FileReference file,
+//            int[] keyFields, int domainSize, double delta, double epsilon, int fanOut) {
+//        super(bufferCache, fileMapProvider, file);
+//        this.keyFields = keyFields;
+//        this.fanoutLog = (int) (Math.log(fanOut) / Math.log(2.0));
+//        this.levelNum = domainSize / fanoutLog;
+//        this.epsilon = epsilon;
+//        final int depth = (int) Math.ceil(Math.log(1 / delta));
+//        final int width = (int) Math.ceil(1 / epsilon);
+//        gcSketch = new GroupCountSketch(this.levelNum + 1, depth, width, fanoutLog);
+//    }
+//
+//    public void update(long item, double diff) {
+//        //translate position to coefficient
+//        item += 1 << (levelNum * fanoutLog);
+//        //transform update into wavelet domain
+//        long div = 1;
+//        for (int i = 0; i < levelNum; i++) {
+//            //            Long coeffIdx = (long) ((1 << ((levelNum - i) * fanoutLog)) + item);
+//            item >>= (fanoutLog - 1);
+//            int sign = (item & 1) == 0 ? 1 : -1;
+//            item >>= 1;
+//            double normCoeff = WaveletCoefficient.getNormalizationCoefficient(levelNum * fanoutLog,
+//                    (i + 1) * fanoutLog);
+//            div = (1 << ((i + 1) * fanoutLog));
+//
+//            gcSketch.update(item, diff * sign / (normCoeff * div));
+//        }
+//        gcSketch.update(0, diff / div);
+//    }
+//
+//    @Override
+//    public void add(ITupleReference tuple) throws IndexException, HyracksDataException {
+//        // TODO Auto-generated method stub
+//
+//    }
+//
+//    @Override
+//    public void end() throws IndexException, HyracksDataException {
+//        // TODO Auto-generated method stub
+//
+//    }
+//
+//    @Override
+//    public void create() {
+//        // TODO Auto-generated method stub
+//
+//    }
+//
+//    @Override
+//    public void activate() {
+//        // TODO Auto-generated method stub
+//
+//    }
+//
+//    @Override
+//    public IIndexBulkLoader createBuilder(long numElements) {
+//        // TODO Auto-generated method stub
+//        return null;
+//    }
+//
+//    @Override
+//    public void addElement(Object key, Object value) {
+//        // TODO Auto-generated method stub
+//
+//    }
+//
+//}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/wavelet/WaveletCoefficient.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/wavelet/WaveletCoefficient.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/wavelet/WaveletCoefficient.java
new file mode 100644
index 0000000..5d9615b
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/wavelet/WaveletCoefficient.java
@@ -0,0 +1,83 @@
+package org.apache.hyracks.storage.am.common.statistics.wavelet;
+
+import java.util.Map.Entry;
+import java.util.Objects;
+
+import org.apache.hyracks.data.std.api.INumeric;
+
+public class WaveletCoefficient<K/*extends IMath<K>*/, V extends Comparable<V>/* extends IMath<V>*/>
+        implements Entry<K, V>, Comparable<WaveletCoefficient<K, V>> {
+
+    public V value;
+    public int level;
+    public K index;
+
+    public WaveletCoefficient() {
+    }
+
+    public WaveletCoefficient(V value, int level, K index) {
+        this.value = value;
+        this.level = level;
+        this.index = index;
+    }
+
+    @Override
+    public K getKey() {
+        return index;
+    }
+
+    @Override
+    public V getValue() {
+        return value;
+    }
+
+    @Override
+    public V setValue(V value) {
+        this.value = value;
+        return this.value;
+    }
+
+    public void setLevel(int level) {
+        this.level = level;
+    }
+
+    public void setIndex(K index) {
+        this.index = index;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (!(o instanceof WaveletCoefficient))
+            return false;
+        @SuppressWarnings("unchecked")
+        WaveletCoefficient<K, V> triple = (WaveletCoefficient<K, V>) o;
+        return triple.value.equals(value) && triple.level == level && triple.index.equals(index);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(value, level, index);
+    }
+
+    public static Double getNormalizationCoefficient(int maxLevel, int level) {
+        return (1 << ((maxLevel - level) / 2)) * ((((maxLevel - level) % 2) == 0) ? 1 : Math.sqrt(2));
+    }
+
+    public static <K extends INumeric> int getLevel(K coeffPointable, int maxLevel) {
+        long coeffIdx = coeffPointable.longValue();
+        if (coeffIdx == 0)
+            return maxLevel;
+        int level = -1;
+        while (coeffIdx > 0) {
+            coeffIdx = coeffIdx >> 1;
+            level++;
+        }
+        return maxLevel - level;
+    }
+
+    @Override
+    // default comparator based on coefficient value
+    public int compareTo(WaveletCoefficient<K, V> o) {
+        return value.compareTo(o.getValue());
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/wavelet/WaveletSynopsis.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/wavelet/WaveletSynopsis.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/wavelet/WaveletSynopsis.java
new file mode 100644
index 0000000..cae952d
--- /dev/null
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/statistics/wavelet/WaveletSynopsis.java
@@ -0,0 +1,263 @@
+package org.apache.hyracks.storage.am.common.statistics.wavelet;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.PriorityQueue;
+import java.util.Stack;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.io.FileReference;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.storage.am.common.api.IIndexBulkLoader;
+import org.apache.hyracks.storage.am.common.api.IPrimitiveIntegerValueProvider;
+import org.apache.hyracks.storage.am.common.api.IPrimitiveIntegerValueProviderFactory;
+import org.apache.hyracks.storage.am.common.api.IndexException;
+import org.apache.hyracks.storage.am.common.statistics.Synopsis;
+import org.apache.hyracks.storage.common.buffercache.IBufferCache;
+import org.apache.hyracks.storage.common.buffercache.ICachedPage;
+import org.apache.hyracks.storage.common.file.BufferedFileHandle;
+import org.apache.hyracks.storage.common.file.IFileMapProvider;
+
+public class WaveletSynopsis/*<K extends IMath<K>, V extends IMath<V>>*/ extends Synopsis<Integer, Double> {
+
+    private final static int METADATA_PAGE_ID = 0;
+    private final static int NUM_PAGES_OFFSET = 0;
+    private final static int NUM_ELEMENTS_OFFSET = NUM_PAGES_OFFSET + 4;
+
+    private final int[] waveletFields;
+    private final IPrimitiveIntegerValueProvider waveletFieldValueProvider;
+    private final PriorityQueue<WaveletCoefficient<Integer, Double>> coefficients;
+    private final long threshold;
+
+    private final int numPages;
+
+    public WaveletSynopsis(IBufferCache bufferCache, IFileMapProvider fileMapProvider, FileReference file,
+            int[] keyFields, int threshold, IPrimitiveIntegerValueProviderFactory valueProviderFactory) {
+        super(bufferCache, fileMapProvider, file);
+        this.waveletFields = keyFields;
+        this.waveletFieldValueProvider = valueProviderFactory.createPrimitiveIntegerValueProvider();
+        this.threshold = threshold;
+        this.coefficients = new PriorityQueue<>(threshold);
+        this.numPages = (int) Math.ceil(threshold * (4 + 8) / (double) bufferCache.getPageSize());
+    }
+
+    @Override
+    public int getNumPages() throws HyracksDataException {
+        if (!isActivated) {
+            throw new HyracksDataException("The bloom filter is not activated.");
+        }
+        return numPages;
+    }
+
+    private void initWaveletSynopsisMetaData() throws HyracksDataException {
+        ICachedPage metaPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, METADATA_PAGE_ID), true);
+        metaPage.acquireWriteLatch();
+        try {
+            metaPage.getBuffer().putInt(NUM_PAGES_OFFSET, 0);
+            metaPage.getBuffer().putLong(NUM_ELEMENTS_OFFSET, 0L);
+        } finally {
+            metaPage.releaseWriteLatch(true);
+            bufferCache.unpin(metaPage);
+        }
+    }
+
+    @Override
+    // Adds a new coefficient to the transform, subject to thresholding
+    public void addElement(Integer index, Double value) {
+        WaveletCoefficient<Integer, Double> newCoeff;
+        if (coefficients.size() < threshold)
+            newCoeff = new WaveletCoefficient<Integer, Double>(value, 0, index);
+        else {
+            newCoeff = coefficients.poll();
+            newCoeff.setValue(value);
+            newCoeff.setIndex(index);
+        }
+        coefficients.add(newCoeff);
+
+    }
+
+    @Override
+    public IIndexBulkLoader createBuilder() throws HyracksDataException {
+        return new SparseTransformBuilder();
+    }
+
+    public class SparseTransformBuilder implements IIndexBulkLoader {
+        private final Stack<WaveletCoefficient<Integer, Double>> avgStack;
+        private Pair<Integer, Double> curr;
+        private long transformPos;
+        private final long domainEnd;
+        private int lastLevel;
+        private final List<Pair<Integer, Double>> borderTuples;
+
+        public SparseTransformBuilder() throws HyracksDataException {
+            avgStack = new Stack<>();
+            borderTuples = new ArrayList<>(2);
+            // initial transform element
+            transformPos = Byte.MIN_VALUE;//(int) waveletFieldPointer.minDomainValue();
+            domainEnd = Byte.MAX_VALUE;
+            lastLevel = 0;
+
+            persistWaveletSynopsisMetaData();
+        }
+
+        private void persistWaveletSynopsisMetaData() throws HyracksDataException {
+            ICachedPage metaPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, METADATA_PAGE_ID), false);
+            metaPage.acquireWriteLatch();
+            try {
+                metaPage.getBuffer().putInt(NUM_PAGES_OFFSET, numPages);
+                metaPage.getBuffer().putLong(NUM_ELEMENTS_OFFSET, threshold);
+            } finally {
+                metaPage.releaseWriteLatch(true);
+                bufferCache.unpin(metaPage);
+            }
+        }
+
+        @Override
+        public void add(ITupleReference tuple) throws IndexException, HyracksDataException {
+            //            if (waveletFields.length > 1)
+            //                throw new HyracksDataException("Wavelet synopsis does not support composite keys");
+            curr = Pair.of((int) waveletFieldValueProvider.getValue(tuple.getFieldData(waveletFields[0]),
+                    tuple.getFieldStart(waveletFields[0])), 1.0);
+            final int maxLevel = waveletFieldValueProvider.maxLevel(tuple.getFieldData(waveletFields[0]),
+                    tuple.getFieldStart(waveletFields[0]));
+
+            while (transformPos <= curr.getKey()) {
+                // current position is a left border of dyadic range
+                if (curr.getKey() == transformPos) {
+                    borderTuples.add(Pair.of(0, (double) curr.getValue()));
+                    return;
+                }
+                int newLevel = (int) Math.floor(Math.log((double) (curr.getKey()) - transformPos + 1) / Math.log(2));
+                long levelRightBorder = 1l << newLevel;
+                //add first dummy average
+                if (avgStack.isEmpty()) {
+                    avgStack.push(new WaveletCoefficient<Integer, Double>(0.0, maxLevel, 0));
+                    lastLevel = newLevel;
+                }
+
+                // current position is a right border of dyadic range
+                if (curr.getKey() == transformPos + levelRightBorder - 1 /*&& curr.position != domainMax*/)
+                    borderTuples.add(Pair.of((int) (levelRightBorder - 1), (double) curr.getValue()));
+
+                WaveletCoefficient<Integer, Double> newCoeff;
+                WaveletCoefficient<Integer, Double> topCoeff = avgStack.peek();
+                if (newLevel >= lastLevel) {
+                    topCoeff = computeDyadicRange(lastLevel, maxLevel, topCoeff, borderTuples);
+                    newCoeff = topCoeff;
+                    do {
+                        WaveletCoefficient<Integer, Double> oldCoeff = avgStack.pop();
+                        //skip first dummy coefficient
+                        if (oldCoeff.index > 0)
+                            newCoeff = average(oldCoeff, newCoeff, waveletFieldValueProvider.maxLevel(
+                                    tuple.getFieldData(waveletFields[0]), tuple.getFieldStart(waveletFields[0])));
+                    } while (!avgStack.isEmpty() && avgStack.peek().level == newCoeff.level);
+                } else {
+                    newCoeff = computeDyadicRange(newLevel, maxLevel, topCoeff, borderTuples);
+                    topCoeff = newCoeff;
+                }
+                avgStack.push(newCoeff);
+                transformPos += 1l << topCoeff.level;
+                lastLevel = newCoeff.level;
+
+                borderTuples.clear();
+            }
+        }
+
+        private WaveletCoefficient<Integer, Double> average(WaveletCoefficient<Integer, Double> oldCoeff,
+                WaveletCoefficient<Integer, Double> newCoeff, int maxLevel) {
+            Integer coeffIdx = oldCoeff.index >> 1;
+            addElement(coeffIdx, (oldCoeff.value - newCoeff.value)
+                    / (2.0 * WaveletCoefficient.getNormalizationCoefficient(maxLevel, oldCoeff.level + 1)));
+            WaveletCoefficient<Integer, Double> topCoeff = new WaveletCoefficient<Integer, Double>(
+                    (oldCoeff.value + newCoeff.value) / 2.0, oldCoeff.level + 1, coeffIdx);
+            return topCoeff;
+        }
+
+        private WaveletCoefficient<Integer, Double> computeDyadicRange(int level, int maxLevel,
+                WaveletCoefficient<Integer, Double> topCoeff, List<Pair<Integer, Double>> borderTuples) {
+            //short circuit coefficient computation for 0
+            if (borderTuples.isEmpty()) {
+                Integer coeffIdx = ((topCoeff.index + 1) << (topCoeff.level - level));
+                return new WaveletCoefficient<Integer, Double>(0.0, level, coeffIdx);
+            }
+
+            Map<Integer, Double> newCoefs = new HashMap<>();
+            Double avg = 0.0;
+            Integer coeffIdx = -1;
+            for (int i = 1; i <= level; i++) {
+                newCoefs.clear();
+                avg = 0.0;
+                for (int j = 0; j < borderTuples.size(); j++) {
+                    Pair<Integer, Double> item = borderTuples.get(j);
+                    coeffIdx = ((topCoeff.index + 1) << (topCoeff.level - i)) + (item.getKey() >> 1);
+                    Double newValue = item.getValue() / 2.0;
+                    Double oldValue = newCoefs.containsKey(coeffIdx) ? newCoefs.get(coeffIdx) : 0;
+                    if ((item.getKey() & 0x1) == 1) {
+                        newCoefs.put(coeffIdx, (oldValue - newValue));
+                    } else {
+                        newCoefs.put(coeffIdx, (oldValue + newValue));
+                    }
+                    avg += newValue;
+                    borderTuples.set(j, Pair.of(item.getKey() >> 1, newValue));
+                }
+
+                for (Entry<Integer, Double> e : newCoefs.entrySet())
+                    addElement(e.getKey(), e.getValue() / WaveletCoefficient.getNormalizationCoefficient(maxLevel, i));
+            }
+            return new WaveletCoefficient<Integer, Double>(avg, level, coeffIdx);
+        }
+
+        @Override
+        public void end() throws IndexException, HyracksDataException {
+            //assert(avgStack.size() == 1);
+            if (curr.getKey() != domainEnd) {
+
+            }
+            WaveletCoefficient<Integer, Double> topCoeff = avgStack.pop();
+            topCoeff.index = 0;
+            addElement(topCoeff.index, topCoeff.value);
+
+            persistStatistics();
+        }
+
+        private void persistStatistics() throws HyracksDataException {
+            List<WaveletCoefficient<Integer, Double>> persistCoefficients = new ArrayList<>(coefficients);
+            // sorting coefficients according their keys
+            Collections.sort(persistCoefficients, new Comparator<WaveletCoefficient<Integer, Double>>() {
+                @Override
+                public int compare(WaveletCoefficient<Integer, Double> o1, WaveletCoefficient<Integer, Double> o2) {
+                    return o1.getKey().compareTo(o2.getKey());
+                }
+            });
+            Iterator<WaveletCoefficient<Integer, Double>> it = persistCoefficients.iterator();
+            int currentPageId = 1;
+            while (currentPageId <= numPages) {
+                ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
+                ByteBuffer buffer = page.getBuffer();
+                page.acquireWriteLatch();
+                try {
+                    while (it.hasNext() && (buffer.limit() - buffer.position()) >= 4 + 8) {
+                        WaveletCoefficient<Integer, Double> coeff = it.next();
+                        buffer.putInt(coeff.getKey());
+                        buffer.putDouble(coeff.getValue());
+                    }
+                } finally {
+                    page.releaseWriteLatch(true);
+                    bufferCache.unpin(page);
+                }
+                ++currentPageId;
+            }
+
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/AbstractLSMBTreeDataflowHelper.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/AbstractLSMBTreeDataflowHelper.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/AbstractLSMBTreeDataflowHelper.java
new file mode 100644
index 0000000..afaa949
--- /dev/null
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/AbstractLSMBTreeDataflowHelper.java
@@ -0,0 +1,34 @@
+package org.apache.hyracks.storage.am.lsm.btree.dataflow;
+
+import java.util.List;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.storage.am.common.api.IPrimitiveIntegerValueProviderFactory;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
+import org.apache.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
+import org.apache.hyracks.storage.am.lsm.common.dataflow.AbstractLSMIndexDataflowHelper;
+
+public abstract class AbstractLSMBTreeDataflowHelper extends AbstractLSMIndexDataflowHelper {
+
+    protected final boolean collectStatistics;
+    protected final IPrimitiveIntegerValueProviderFactory statsValueProviderFactory;
+
+    public AbstractLSMBTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
+            List<IVirtualBufferCache> virtualBufferCaches, double bloomFilterFalsePositiveRate,
+            ILSMMergePolicy mergePolicy, ILSMOperationTrackerProvider opTrackerFactory,
+            ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
+            ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories, int[] filterFields,
+            boolean durable, boolean collectStatistics,
+            IPrimitiveIntegerValueProviderFactory statsValueProviderFactory) {
+        super(opDesc, ctx, partition, virtualBufferCaches, bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory,
+                ioScheduler, ioOpCallbackFactory, filterTypeTraits, filterCmpFactories, filterFields, durable);
+        this.collectStatistics = collectStatistics;
+        this.statsValueProviderFactory = statsValueProviderFactory;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/AbstractLSMBTreeDataflowHelperFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/AbstractLSMBTreeDataflowHelperFactory.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/AbstractLSMBTreeDataflowHelperFactory.java
new file mode 100644
index 0000000..3fa052f
--- /dev/null
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/AbstractLSMBTreeDataflowHelperFactory.java
@@ -0,0 +1,35 @@
+package org.apache.hyracks.storage.am.lsm.btree.dataflow;
+
+import java.util.Map;
+
+import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.storage.am.common.api.IPrimitiveIntegerValueProviderFactory;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMMergePolicyFactory;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
+import org.apache.hyracks.storage.am.lsm.common.api.IVirtualBufferCacheProvider;
+import org.apache.hyracks.storage.am.lsm.common.dataflow.AbstractLSMIndexDataflowHelperFactory;
+
+public abstract class AbstractLSMBTreeDataflowHelperFactory extends AbstractLSMIndexDataflowHelperFactory {
+
+    private static final long serialVersionUID = 1L;
+    protected final boolean collectStatistics;
+    protected final IPrimitiveIntegerValueProviderFactory statsValueProviderFactory;
+
+    public AbstractLSMBTreeDataflowHelperFactory(IVirtualBufferCacheProvider virtualBufferCacheProvider,
+            ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties,
+            ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationSchedulerProvider ioSchedulerProvider,
+            ILSMIOOperationCallbackFactory ioOpCallbackFactory, double bloomFilterFalsePositiveRate,
+            ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories, int[] filterFields,
+            boolean durable, boolean collectStatistics,
+            IPrimitiveIntegerValueProviderFactory statsValueProviderFactory) {
+        super(virtualBufferCacheProvider, mergePolicyFactory, mergePolicyProperties, opTrackerFactory,
+                ioSchedulerProvider, ioOpCallbackFactory, bloomFilterFalsePositiveRate, filterTypeTraits,
+                filterCmpFactories, filterFields, durable);
+        this.collectStatistics = collectStatistics;
+        this.statsValueProviderFactory = statsValueProviderFactory;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/ExternalBTreeDataflowHelper.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/ExternalBTreeDataflowHelper.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/ExternalBTreeDataflowHelper.java
index 0e23fa4..0407257 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/ExternalBTreeDataflowHelper.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/ExternalBTreeDataflowHelper.java
@@ -23,6 +23,7 @@ import java.util.List;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.storage.am.common.api.IIndex;
+import org.apache.hyracks.storage.am.common.api.IPrimitiveIntegerValueProviderFactory;
 import org.apache.hyracks.storage.am.common.api.ITreeIndex;
 import org.apache.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
 import org.apache.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
@@ -40,18 +41,22 @@ public class ExternalBTreeDataflowHelper extends LSMBTreeDataflowHelper {
     public ExternalBTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
             double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy,
             ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
-            ILSMIOOperationCallbackFactory ioOpCallbackFactory, boolean needKeyDupCheck, int version, boolean durable) {
+            ILSMIOOperationCallbackFactory ioOpCallbackFactory, boolean needKeyDupCheck, int version, boolean durable,
+            boolean collectStatistics, IPrimitiveIntegerValueProviderFactory statsValueProviderFactory) {
         super(opDesc, ctx, partition, null, bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory, ioScheduler,
-                ioOpCallbackFactory, false, null, null, null, null, durable);
+                ioOpCallbackFactory, false, null, null, null, null, durable, collectStatistics,
+                statsValueProviderFactory);
         this.version = version;
     }
 
     public ExternalBTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
             List<IVirtualBufferCache> virtualBufferCaches, ILSMMergePolicy mergePolicy,
             ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
-            ILSMIOOperationCallbackFactory ioOpCallbackFactory, boolean needKeyDupCheck, int version, boolean durable) {
+            ILSMIOOperationCallbackFactory ioOpCallbackFactory, boolean needKeyDupCheck, int version, boolean durable,
+            boolean collectStatistics, IPrimitiveIntegerValueProviderFactory statsValueProviderFactory) {
         this(opDesc, ctx, partition, DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE, mergePolicy, opTrackerFactory,
-                ioScheduler, ioOpCallbackFactory, needKeyDupCheck, version, durable);
+                ioScheduler, ioOpCallbackFactory, needKeyDupCheck, version, durable, collectStatistics,
+                statsValueProviderFactory);
     }
 
     @Override
@@ -71,11 +76,12 @@ public class ExternalBTreeDataflowHelper extends LSMBTreeDataflowHelper {
     @Override
     public ITreeIndex createIndexInstance() throws HyracksDataException {
         AbstractTreeIndexOperatorDescriptor treeOpDesc = (AbstractTreeIndexOperatorDescriptor) opDesc;
-        return LSMBTreeUtils.createExternalBTree(file, opDesc.getStorageManager().getBufferCache(ctx), opDesc
-                .getStorageManager().getFileMapProvider(ctx), treeOpDesc.getTreeIndexTypeTraits(), treeOpDesc
-                .getTreeIndexComparatorFactories(), treeOpDesc.getTreeIndexBloomFilterKeyFields(),
+        return LSMBTreeUtils.createExternalBTree(file, opDesc.getStorageManager().getBufferCache(ctx),
+                opDesc.getStorageManager().getFileMapProvider(ctx), treeOpDesc.getTreeIndexTypeTraits(),
+                treeOpDesc.getTreeIndexComparatorFactories(), treeOpDesc.getTreeIndexBloomFilterKeyFields(),
                 bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory.getOperationTracker(ctx), ioScheduler,
-                ioOpCallbackFactory.createIOOperationCallback(), getVersion(), durable);
+                ioOpCallbackFactory.createIOOperationCallback(), getVersion(), durable, collectStatistics,
+                statsValueProviderFactory);
     }
 
     public int getVersion() {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/cf029cf3/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/ExternalBTreeDataflowHelperFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/ExternalBTreeDataflowHelperFactory.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/ExternalBTreeDataflowHelperFactory.java
index a4a0792..c54c826 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/ExternalBTreeDataflowHelperFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/dataflow/ExternalBTreeDataflowHelperFactory.java
@@ -22,14 +22,14 @@ import java.util.Map;
 
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.storage.am.common.api.IIndexDataflowHelper;
+import org.apache.hyracks.storage.am.common.api.IPrimitiveIntegerValueProviderFactory;
 import org.apache.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
 import org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
 import org.apache.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider;
 import org.apache.hyracks.storage.am.lsm.common.api.ILSMMergePolicyFactory;
 import org.apache.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
-import org.apache.hyracks.storage.am.lsm.common.dataflow.AbstractLSMIndexDataflowHelperFactory;
 
-public class ExternalBTreeDataflowHelperFactory extends AbstractLSMIndexDataflowHelperFactory {
+public class ExternalBTreeDataflowHelperFactory extends AbstractLSMBTreeDataflowHelperFactory {
 
     private static final long serialVersionUID = 1L;
 
@@ -38,9 +38,11 @@ public class ExternalBTreeDataflowHelperFactory extends AbstractLSMIndexDataflow
     public ExternalBTreeDataflowHelperFactory(ILSMMergePolicyFactory mergePolicyFactory,
             Map<String, String> mergePolicyProperties, ILSMOperationTrackerProvider opTrackerFactory,
             ILSMIOOperationSchedulerProvider ioSchedulerProvider, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
-            double bloomFilterFalsePositiveRate, int version, boolean durable) {
+            double bloomFilterFalsePositiveRate, int version, boolean durable, boolean collectStatistics,
+            IPrimitiveIntegerValueProviderFactory statsValueProviderFactory) {
         super(null, mergePolicyFactory, mergePolicyProperties, opTrackerFactory, ioSchedulerProvider,
-                ioOpCallbackFactory, bloomFilterFalsePositiveRate, null, null, null, durable);
+                ioOpCallbackFactory, bloomFilterFalsePositiveRate, null, null, null, durable, collectStatistics,
+                statsValueProviderFactory);
         this.version = version;
     }
 
@@ -49,7 +51,8 @@ public class ExternalBTreeDataflowHelperFactory extends AbstractLSMIndexDataflow
             int partition) {
         return new ExternalBTreeDataflowHelper(opDesc, ctx, partition, bloomFilterFalsePositiveRate,
                 mergePolicyFactory.createMergePolicy(mergePolicyProperties, ctx), opTrackerFactory,
-                ioSchedulerProvider.getIOScheduler(ctx), ioOpCallbackFactory, false, version, durable);
+                ioSchedulerProvider.getIOScheduler(ctx), ioOpCallbackFactory, false, version, durable,
+                collectStatistics, statsValueProviderFactory);
     }
 
 }



Mime
View raw message