kylin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nju_y...@apache.org
Subject [1/2] kylin git commit: APACHE-KYLIN-2802: fix issue of CuboidStatsUtil.createAllDescendantsCache
Date Fri, 25 Aug 2017 03:28:01 GMT
Repository: kylin
Updated Branches:
  refs/heads/yaho-cube-planner e75770f65 -> c0a785d46


APACHE-KYLIN-2802: fix issue of CuboidStatsUtil.createAllDescendantsCache


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/abf4a97c
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/abf4a97c
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/abf4a97c

Branch: refs/heads/yaho-cube-planner
Commit: abf4a97cc4a7530dbdf51db4c51877bd1999fafd
Parents: e75770f
Author: Zhong <nju_yaho@apache.org>
Authored: Fri Aug 25 08:15:49 2017 +0800
Committer: Zhong <nju_yaho@apache.org>
Committed: Fri Aug 25 08:15:49 2017 +0800

----------------------------------------------------------------------
 .../cube/cuboid/algorithm/CuboidStats.java      |  27 +++-
 .../cube/cuboid/algorithm/CuboidStatsUtil.java  | 134 +++++++++++-------
 .../cuboid/algorithm/CuboidStatsUtilTest.java   |  45 ++++---
 .../engine/mr/common/CuboidRecommenderUtil.java |   2 +-
 .../engine/mr/common/CuboidStatsReaderUtil.java | 135 +++++++++++++++++++
 .../kylin/engine/mr/common/CuboidStatsUtil.java | 135 -------------------
 6 files changed, 273 insertions(+), 205 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java
index 1775d5a..e4b0167 100755
--- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java
@@ -18,6 +18,7 @@
 
 package org.apache.kylin.cube.cuboid.algorithm;
 
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
@@ -120,7 +121,8 @@ public class CuboidStats {
     private ImmutableMap<Long, Double> cuboidHitProbabilityMap;
     private ImmutableMap<Long, Long> cuboidScanCountMap;
 
-    private ImmutableMap<Long, Set<Long>> allDescendantsCache;
+    private ImmutableMap<Long, List<Long>> directChildrenCache;
+    private Map<Long, Set<Long>> allDescendantsCache;
 
     private CuboidStats(String key, long baseCuboidId, Set<Long> mandatoryCuboids,
Map<Long, Long> statistics,
             Map<Long, Double> size, Map<Long, Long> hitFrequencyMap, Map<Long,
Map<Long, Long>> scanCountSourceMap) {
@@ -187,8 +189,10 @@ public class CuboidStats {
         }
         this.cuboidScanCountMap = ImmutableMap.<Long, Long> builder().putAll(tmpCuboidScanCountMap).build();
 
-        this.allDescendantsCache = ImmutableMap.<Long, Set<Long>> builder()
-                .putAll(CuboidStatsUtil.createAllDescendantsCache(statistics.keySet())).build();
+        this.directChildrenCache = ImmutableMap.<Long, List<Long>> builder()
+                .putAll(CuboidStatsUtil.createDirectChildrenCache(statistics.keySet())).build();
+
+        this.allDescendantsCache = Maps.newHashMap();
     }
 
     private long getExpScanCount(long sourceCuboid, Map<Long, Long> statistics,
@@ -216,11 +220,26 @@ public class CuboidStats {
     public Set<Long> getAllDescendants(long cuboid) {
         Set<Long> allDescendants = Sets.newLinkedHashSet();
         if (selectionCuboidSet.contains(cuboid)) {
-            return allDescendantsCache.get(cuboid);
+            if (allDescendantsCache.get(cuboid) != null) {
+                return allDescendantsCache.get(cuboid);
+            } else {
+                getAllDescendants(cuboid, allDescendants);
+                allDescendantsCache.put(cuboid, allDescendants);
+            }
         }
         return allDescendants;
     }
 
+    private void getAllDescendants(long cuboid, Set<Long> allDescendants) {
+        if (allDescendants.contains(cuboid)) {
+            return;
+        }
+        allDescendants.add(cuboid);
+        for (Long directChild : directChildrenCache.get(cuboid)) {
+            getAllDescendants(directChild, allDescendants);
+        }
+    }
+
     public Set<Long> getAllCuboidsForSelection() {
         return selectionCuboidSet;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java
index 6d5bbe5..c2683df 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java
@@ -20,13 +20,13 @@ package org.apache.kylin.cube.cuboid.algorithm;
 
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
@@ -102,62 +102,102 @@ public class CuboidStatsUtil {
         }
     }
 
-    /** Using dynamic programming to use extra space to reduce repetitive computation*/
-    public static Map<Long, Set<Long>> createAllDescendantsCache(final Set<Long>
cuboidSet) {
-        List<Long> latticeCuboidList = Lists.newArrayList(cuboidSet);
-        Collections.sort(latticeCuboidList);
-
-        Map<Long, Set<Long>> allDescendantsCache = Maps.newHashMap();
-        Set<Long> preNoneDescendants = Sets.newHashSet();
-        for (int i = 0; i < latticeCuboidList.size(); i++) {
-            Long currentCuboid = latticeCuboidList.get(i);
-            Set<Long> descendants = Sets.newHashSet(currentCuboid);
-            Set<Long> curNoneDescendants = Sets.newHashSet();
-            if (i > 0) {
-                long preCuboid = latticeCuboidList.get(i - 1);
-                if (isDescendant(preCuboid, currentCuboid)) {
-                    descendants.addAll(allDescendantsCache.get(preCuboid));
-                } else {
-                    curNoneDescendants.add(preCuboid);
-                    for (long cuboidToCheck : allDescendantsCache.get(preCuboid)) {
-                        if (isDescendant(cuboidToCheck, currentCuboid)) {
-                            descendants.addAll(allDescendantsCache.get(cuboidToCheck));
-                        }
-                    }
-                }
-            }
-            for (long cuboidToCheck : preNoneDescendants) {
-                if (isDescendant(cuboidToCheck, currentCuboid)) {
-                    descendants.addAll(allDescendantsCache.get(cuboidToCheck));
-                } else {
-                    curNoneDescendants.add(cuboidToCheck);
+    public static Map<Long, List<Long>> createDirectChildrenCache(final Set<Long>
cuboidSet) {
+        /**
+         * Sort the list by ascending order:
+         * */
+        final List<Long> cuboidList = Lists.newArrayList(cuboidSet);
+        Collections.sort(cuboidList);
+        /**
+         * Sort the list by ascending order:
+         * 1. the more bit count of its value, the bigger
+         * 2. the larger of its value, the bigger
+         * */
+        List<Integer> layerIdxList = Lists.newArrayListWithExpectedSize(cuboidList.size());
+        for (int i = 0; i < cuboidList.size(); i++) {
+            layerIdxList.add(i);
+        }
+        Collections.sort(layerIdxList, new Comparator<Integer>() {
+            @Override
+            public int compare(Integer i1, Integer i2) {
+                Long o1 = cuboidList.get(i1);
+                Long o2 = cuboidList.get(i2);
+                int nBitDiff = Long.bitCount(o1) - Long.bitCount(o2);
+                if (nBitDiff != 0) {
+                    return nBitDiff;
                 }
+                return Long.compare(o1, o2);
             }
+        });
+        /**
+         * Construct an index array for pointing the position in layerIdxList
+         * (layerCuboidList is for speeding up continuous iteration)
+         * */
+        int[] toLayerIdxArray = new int[layerIdxList.size()];
+        final List<Long> layerCuboidList = Lists.newArrayListWithExpectedSize(cuboidList.size());
+        for (int i = 0; i < layerIdxList.size(); i++) {
+            int cuboidIdx = layerIdxList.get(i);
+            toLayerIdxArray[cuboidIdx] = i;
+            layerCuboidList.add(cuboidList.get(cuboidIdx));
+        }
 
-            allDescendantsCache.put(currentCuboid, descendants);
-            preNoneDescendants = curNoneDescendants;
+        int[] previousLayerLastIdxArray = new int[layerIdxList.size()];
+        int currentBitCount = 0;
+        int previousLayerLastIdx = -1;
+        for (int i = 0; i < layerIdxList.size(); i++) {
+            int cuboidIdx = layerIdxList.get(i);
+            int nBits = Long.bitCount(cuboidList.get(cuboidIdx));
+            if (nBits > currentBitCount) {
+                currentBitCount = nBits;
+                previousLayerLastIdx = i - 1;
+            }
+            previousLayerLastIdxArray[i] = previousLayerLastIdx;
         }
 
-        return allDescendantsCache;
+        Map<Long, List<Long>> directChildrenCache = Maps.newHashMap();
+        for (int i = 0; i < cuboidList.size(); i++) {
+            Long currentCuboid = cuboidList.get(i);
+            LinkedList<Long> directChildren = Lists.newLinkedList();
+            int lastLayerIdx = previousLayerLastIdxArray[toLayerIdxArray[i]];
+            /**
+             * Choose one of the two scan strategies
+             * 1. cuboids are sorted by its value, like 1,2,3,4,...
+             * 2. cuboids are layered and sorted, like 1,2,4,8,...,3,5,...
+             * */
+            if (i - 1 <= lastLayerIdx) {
+                /**
+                 * 1. Adding cuboid by descending order
+                 * */
+                for (int j = i - 1; j >= 0; j--) {
+                    checkAndAddDirectChild(directChildren, currentCuboid, cuboidList.get(j));
+                }
+            } else {
+                /**
+                 * 1. Adding cuboid by descending order
+                 * 2. Check from lower cuboid layer
+                 * */
+                for (int j = lastLayerIdx; j >= 0; j--) {
+                    checkAndAddDirectChild(directChildren, currentCuboid, layerCuboidList.get(j));
+                }
+            }
+            directChildrenCache.put(currentCuboid, directChildren);
+        }
+        return directChildrenCache;
     }
 
-    @VisibleForTesting
-    static Map<Long, Set<Long>> createAllDescendantsCache2(final Set<Long>
cuboidSet) {
-        List<Long> latticeCuboidList = Lists.newArrayList(cuboidSet);
-
-        Map<Long, Set<Long>> allDescendantsCache = Maps.newHashMap();
-        for (int i = 0; i < latticeCuboidList.size(); i++) {
-            Long currentCuboid = latticeCuboidList.get(i);
-            Set<Long> descendantSet = Sets.newHashSet(currentCuboid);
-            for (int j = 0; j < i; j++) {
-                Long checkCuboid = latticeCuboidList.get(j);
-                if (isDescendant(checkCuboid, currentCuboid)) {
-                    descendantSet.add(checkCuboid);
+    private static void checkAndAddDirectChild(List<Long> directChildren, Long currentCuboid,
Long checkedCuboid) {
+        if (isDescendant(checkedCuboid, currentCuboid)) {
+            boolean ifDirectChild = true;
+            for (long directChild : directChildren) {
+                if (isDescendant(checkedCuboid, directChild)) {
+                    ifDirectChild = false;
+                    break;
                 }
             }
-            allDescendantsCache.put(currentCuboid, descendantSet);
+            if (ifDirectChild) {
+                directChildren.add(checkedCuboid);
+            }
         }
-        return allDescendantsCache;
     }
 
     public static boolean isDescendant(long cuboidToCheck, long parentCuboid) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java
----------------------------------------------------------------------
diff --git a/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java
b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java
index 8e809cf..688b539 100644
--- a/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java
+++ b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java
@@ -19,14 +19,15 @@
 package org.apache.kylin.cube.cuboid.algorithm;
 
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.concurrent.TimeUnit;
 
 import org.junit.Assert;
 import org.junit.Test;
 
 import com.google.common.base.Stopwatch;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 
@@ -122,16 +123,16 @@ public class CuboidStatsUtilTest {
     }
 
     @Test
-    public void createAllDescendantsCacheTest() {
+    public void createDirectChildrenCacheTest() {
         Set<Long> cuboidSet = generateCuboidSet();
-        Map<Long, Set<Long>> allDescendantsCache = CuboidStatsUtil.createAllDescendantsCache(cuboidSet);
-
-        Assert.assertTrue(allDescendantsCache.get(255L).containsAll(cuboidSet));
-
-        Assert.assertTrue(allDescendantsCache.get(239L).size() == 5);
-
-        Assert.assertTrue(allDescendantsCache.get(50L).containsAll(Sets.newHashSet(50L, 2L)));
-        Assert.assertTrue(!allDescendantsCache.get(50L).contains(4L));
+        Map<Long, List<Long>> directChildrenCache = CuboidStatsUtil.createDirectChildrenCache(cuboidSet);
+
+        Assert.assertTrue(directChildrenCache.get(255L).containsAll(Lists.newArrayList(239L,
159L, 50L)));
+        Assert.assertTrue(directChildrenCache.get(159L).contains(6L));
+        Assert.assertTrue(directChildrenCache.get(50L).contains(2L));
+        Assert.assertTrue(directChildrenCache.get(239L).contains(199L));
+        Assert.assertTrue(directChildrenCache.get(199L).contains(6L));
+        Assert.assertTrue(directChildrenCache.get(6L).containsAll(Lists.newArrayList(4L,
2L)));
     }
 
     private Set<Long> generateMassCuboidSet() {
@@ -144,18 +145,26 @@ public class CuboidStatsUtilTest {
     }
 
     @Test
-    public void createAllDescendantsCacheStressTest() {
+    public void createDirectChildrenCacheStressTest() {
         Stopwatch sw = new Stopwatch();
         sw.start();
         Set<Long> cuboidSet = generateMassCuboidSet();
-        System.out.println("Time elapsed for creating sorted cuboid list: " + sw.elapsed(TimeUnit.MILLISECONDS));
-        sw.reset();
-        sw.start();
-        CuboidStatsUtil.createAllDescendantsCache(cuboidSet);
-        System.out.println("Time elapsed for creating descendants cache: " + sw.elapsed(TimeUnit.MILLISECONDS));
+        System.out.println("Time elapsed for creating sorted cuboid list: " + sw.elapsedMillis());
         sw.reset();
         sw.start();
-        CuboidStatsUtil.createAllDescendantsCache2(cuboidSet);
-        System.out.println("Time elapsed for creating descendants cache2: " + sw.elapsed(TimeUnit.MILLISECONDS));
+        checkDirectChildrenCacheStressTest(CuboidStatsUtil.createDirectChildrenCache(cuboidSet));
+        System.out.println("Time elapsed for creating direct children cache: " + sw.elapsedMillis());
+        sw.stop();
+    }
+
+    private void checkDirectChildrenCacheStressTest(Map<Long, List<Long>> directChildrenCache)
{
+        for (Map.Entry<Long, List<Long>> entry : directChildrenCache.entrySet())
{
+            if (Long.bitCount(entry.getKey()) == 1) {
+                Assert.assertTrue("Check for cuboid " + entry.getKey(), entry.getValue().size()
== 0);
+            } else {
+                Assert.assertTrue("Check for cuboid " + entry.getKey(),
+                        Long.bitCount(entry.getKey()) == entry.getValue().size());
+            }
+        }
     }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
index a0b20fa..ba3f023 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
@@ -62,7 +62,7 @@ public class CuboidRecommenderUtil {
     public static Map<Long, Long> getRecommendCuboidList(CubeInstance cube, Map<Long,
Long> hitFrequencyMap,
             Map<Long, Map<Long, Long>> rollingUpCountSourceMap) throws IOException
{
 
-        Pair<Map<Long, Long>, Map<Long, Double>> statsPair = CuboidStatsUtil
+        Pair<Map<Long, Long>, Map<Long, Double>> statsPair = CuboidStatsReaderUtil
                 .readCuboidStatsAndSizeFromCube(cube.getCuboidScheduler().getAllCuboidIds(),
cube);
 
         String key = cube.getName();

http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java
new file mode 100644
index 0000000..aaf9aa3
--- /dev/null
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.engine.mr.common;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.kylin.common.util.Pair;
+import org.apache.kylin.cube.CubeInstance;
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.measure.hllc.HLLCounter;
+import org.apache.kylin.metadata.model.SegmentStatusEnum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+public class CuboidStatsReaderUtil {
+
+    private static final Logger logger = LoggerFactory.getLogger(CuboidStatsReaderUtil.class);
+
+    public static Map<Long, Long> readCuboidStatsFromCube(Set<Long> cuboidIds,
CubeInstance cubeInstance)
+            throws IOException {
+        Map<Long, Long> statisticsMerged = readCuboidStatsAndSizeFromCube(cuboidIds,
cubeInstance).getFirst();
+        return statisticsMerged.isEmpty() ? null : statisticsMerged;
+    }
+
+    public static Pair<Map<Long, Long>, Map<Long, Double>> readCuboidStatsAndSizeFromCube(Set<Long>
cuboidIds,
+            CubeInstance cube) throws IOException {
+        Preconditions.checkNotNull(cuboidIds, "The cuboid set can not be null");
+        Preconditions.checkNotNull(cube, "The cube instance can not be null");
+
+        List<CubeSegment> segmentList = cube.getSegments(SegmentStatusEnum.READY);
+        Map<Long, Long> statisticsMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size());
+        Map<Long, Double> sizeMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size());
+        readCuboidStatsFromSegments(cuboidIds, segmentList, statisticsMerged, sizeMerged);
+        return new Pair<>(statisticsMerged, sizeMerged);
+    }
+
+    public static Map<Long, Long> readCuboidStatsFromSegments(Set<Long> cuboidIds,
List<CubeSegment> segmentList)
+            throws IOException {
+        Map<Long, Long> statisticsMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size());
+        readCuboidStatsFromSegments(cuboidIds, segmentList, statisticsMerged,
+                Maps.<Long, Double> newHashMapWithExpectedSize(cuboidIds.size()));
+        return statisticsMerged.isEmpty() ? null : statisticsMerged;
+    }
+
+    private static void readCuboidStatsFromSegments(Set<Long> cuboidSet, List<CubeSegment>
segmentList,
+            final Map<Long, Long> statisticsMerged, final Map<Long, Double> sizeMerged)
throws IOException {
+        if (segmentList == null || segmentList.isEmpty()) {
+            return;
+        }
+        int nSegment = segmentList.size();
+
+        Map<Long, HLLCounter> cuboidHLLMapMerged = Maps.newHashMapWithExpectedSize(cuboidSet.size());
+        Map<Long, Double> sizeMapMerged = Maps.newHashMapWithExpectedSize(cuboidSet.size());
+        for (CubeSegment pSegment : segmentList) {
+            CubeStatsReader pReader = new CubeStatsReader(pSegment, pSegment.getConfig());
+            Map<Long, HLLCounter> pHLLMap = pReader.getCuboidRowEstimatesHLLOrigin();
+            if (pHLLMap == null || pHLLMap.isEmpty()) {
+                logger.info("Cuboid Statistics for segment " + pSegment.getName() + " is
not enabled.");
+                nSegment--;
+                continue;
+            }
+            Map<Long, Double> pSizeMap = pReader.getCuboidSizeMap();
+            for (Long pCuboid : cuboidSet) {
+                HLLCounter pInnerHLL = pHLLMap.get(pCuboid);
+                Preconditions.checkNotNull(pInnerHLL, "statistics should exist for cuboid
" + pCuboid + " of segment "
+                        + pSegment.getCubeDesc().getName() + "[" + pSegment.getName() + "]");
+                if (cuboidHLLMapMerged.get(pCuboid) != null) {
+                    cuboidHLLMapMerged.get(pCuboid).merge(pInnerHLL);
+                } else {
+                    cuboidHLLMapMerged.put(pCuboid, pInnerHLL);
+                }
+
+                Double pSize = sizeMapMerged.get(pCuboid);
+                sizeMapMerged.put(pCuboid, pSize == null ? pSizeMap.get(pCuboid) : pSizeMap.get(pCuboid)
+ pSize);
+            }
+        }
+
+        if (nSegment < 1) {
+            return;
+        }
+        for (Long pCuboid : cuboidSet) {
+            statisticsMerged.put(pCuboid, cuboidHLLMapMerged.get(pCuboid).getCountEstimate()
/ nSegment);
+            sizeMerged.put(pCuboid, sizeMapMerged.get(pCuboid) / nSegment);
+        }
+    }
+
+    public static Map<Long, Long> readCuboidStatsFromSegment(Set<Long> cuboidIds,
CubeSegment cubeSegment)
+            throws IOException {
+        if (cubeSegment == null) {
+            logger.warn("The cube segment can not be " + null);
+            return null;
+        }
+
+        CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, cubeSegment.getConfig());
+        if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
+                || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
+            logger.info("Cuboid Statistics is not enabled.");
+            return null;
+        }
+
+        Map<Long, Long> cuboidsWithStatsAll = cubeStatsReader.getCuboidRowEstimatesHLL();
+        Map<Long, Long> cuboidsWithStats = Maps.newHashMapWithExpectedSize(cuboidIds.size());
+        for (Long cuboid : cuboidIds) {
+            Long rowEstimate = cuboidsWithStatsAll.get(cuboid);
+            if (rowEstimate == null) {
+                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
+            } else {
+                cuboidsWithStats.put(cuboid, rowEstimate);
+            }
+        }
+        return cuboidsWithStats;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsUtil.java
----------------------------------------------------------------------
diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsUtil.java
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsUtil.java
deleted file mode 100644
index 5bb4179..0000000
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsUtil.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.engine.mr.common;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.kylin.common.util.Pair;
-import org.apache.kylin.cube.CubeInstance;
-import org.apache.kylin.cube.CubeSegment;
-import org.apache.kylin.measure.hllc.HLLCounter;
-import org.apache.kylin.metadata.model.SegmentStatusEnum;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Maps;
-
-public class CuboidStatsUtil {
-
-    private static final Logger logger = LoggerFactory.getLogger(CuboidStatsUtil.class);
-
-    public static Map<Long, Long> readCuboidStatsFromCube(Set<Long> cuboidIds,
CubeInstance cubeInstance)
-            throws IOException {
-        Map<Long, Long> statisticsMerged = readCuboidStatsAndSizeFromCube(cuboidIds,
cubeInstance).getFirst();
-        return statisticsMerged.isEmpty() ? null : statisticsMerged;
-    }
-
-    public static Pair<Map<Long, Long>, Map<Long, Double>> readCuboidStatsAndSizeFromCube(Set<Long>
cuboidIds,
-            CubeInstance cube) throws IOException {
-        Preconditions.checkNotNull(cuboidIds, "The cuboid set can not be null");
-        Preconditions.checkNotNull(cube, "The cube instance can not be null");
-
-        List<CubeSegment> segmentList = cube.getSegments(SegmentStatusEnum.READY);
-        Map<Long, Long> statisticsMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size());
-        Map<Long, Double> sizeMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size());
-        readCuboidStatsFromSegments(cuboidIds, segmentList, statisticsMerged, sizeMerged);
-        return new Pair<>(statisticsMerged, sizeMerged);
-    }
-
-    public static Map<Long, Long> readCuboidStatsFromSegments(Set<Long> cuboidIds,
List<CubeSegment> segmentList)
-            throws IOException {
-        Map<Long, Long> statisticsMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size());
-        readCuboidStatsFromSegments(cuboidIds, segmentList, statisticsMerged,
-                Maps.<Long, Double> newHashMapWithExpectedSize(cuboidIds.size()));
-        return statisticsMerged.isEmpty() ? null : statisticsMerged;
-    }
-
-    private static void readCuboidStatsFromSegments(Set<Long> cuboidSet, List<CubeSegment>
segmentList,
-            final Map<Long, Long> statisticsMerged, final Map<Long, Double> sizeMerged)
throws IOException {
-        if (segmentList == null || segmentList.isEmpty()) {
-            return;
-        }
-        int nSegment = segmentList.size();
-
-        Map<Long, HLLCounter> cuboidHLLMapMerged = Maps.newHashMapWithExpectedSize(cuboidSet.size());
-        Map<Long, Double> sizeMapMerged = Maps.newHashMapWithExpectedSize(cuboidSet.size());
-        for (CubeSegment pSegment : segmentList) {
-            CubeStatsReader pReader = new CubeStatsReader(pSegment, pSegment.getConfig());
-            Map<Long, HLLCounter> pHLLMap = pReader.getCuboidRowEstimatesHLLOrigin();
-            if (pHLLMap == null || pHLLMap.isEmpty()) {
-                logger.info("Cuboid Statistics for segment " + pSegment.getName() + " is
not enabled.");
-                nSegment--;
-                continue;
-            }
-            Map<Long, Double> pSizeMap = pReader.getCuboidSizeMap();
-            for (Long pCuboid : cuboidSet) {
-                HLLCounter pInnerHLL = pHLLMap.get(pCuboid);
-                Preconditions.checkNotNull(pInnerHLL, "statistics should exist for cuboid
" + pCuboid + " of segment "
-                        + pSegment.getCubeDesc().getName() + "[" + pSegment.getName() + "]");
-                if (cuboidHLLMapMerged.get(pCuboid) != null) {
-                    cuboidHLLMapMerged.get(pCuboid).merge(pInnerHLL);
-                } else {
-                    cuboidHLLMapMerged.put(pCuboid, pInnerHLL);
-                }
-
-                Double pSize = sizeMapMerged.get(pCuboid);
-                sizeMapMerged.put(pCuboid, pSize == null ? pSizeMap.get(pCuboid) : pSizeMap.get(pCuboid)
+ pSize);
-            }
-        }
-
-        if (nSegment < 1) {
-            return;
-        }
-        for (Long pCuboid : cuboidSet) {
-            statisticsMerged.put(pCuboid, cuboidHLLMapMerged.get(pCuboid).getCountEstimate()
/ nSegment);
-            sizeMerged.put(pCuboid, sizeMapMerged.get(pCuboid) / nSegment);
-        }
-    }
-
-    public static Map<Long, Long> readCuboidStatsFromSegment(Set<Long> cuboidIds,
CubeSegment cubeSegment)
-            throws IOException {
-        if (cubeSegment == null) {
-            logger.warn("The cube segment can not be " + null);
-            return null;
-        }
-
-        CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, cubeSegment.getConfig());
-        if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
-                || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
-            logger.info("Cuboid Statistics is not enabled.");
-            return null;
-        }
-
-        Map<Long, Long> cuboidsWithStatsAll = cubeStatsReader.getCuboidRowEstimatesHLL();
-        Map<Long, Long> cuboidsWithStats = Maps.newHashMapWithExpectedSize(cuboidIds.size());
-        for (Long cuboid : cuboidIds) {
-            Long rowEstimate = cuboidsWithStatsAll.get(cuboid);
-            if (rowEstimate == null) {
-                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
-            } else {
-                cuboidsWithStats.put(cuboid, rowEstimate);
-            }
-        }
-        return cuboidsWithStats;
-    }
-}


Mime
View raw message