Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 2239A200D0F for ; Fri, 25 Aug 2017 05:28:05 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 20E3D16A2A2; Fri, 25 Aug 2017 03:28:05 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id BE841164863 for ; Fri, 25 Aug 2017 05:28:03 +0200 (CEST) Received: (qmail 37228 invoked by uid 500); 25 Aug 2017 03:28:02 -0000 Mailing-List: contact commits-help@kylin.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kylin.apache.org Delivered-To: mailing list commits@kylin.apache.org Received: (qmail 37219 invoked by uid 99); 25 Aug 2017 03:28:01 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 25 Aug 2017 03:28:01 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A0F29DFA24; Fri, 25 Aug 2017 03:28:01 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: nju_yaho@apache.org To: commits@kylin.apache.org Date: Fri, 25 Aug 2017 03:28:01 -0000 Message-Id: <7d2c66e9f7f1456491b6a829f7785d82@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/2] kylin git commit: APACHE-KYLIN-2802: fix issue of CuboidStatsUtil.createAllDescendantsCache archived-at: Fri, 25 Aug 2017 03:28:05 -0000 Repository: kylin Updated Branches: refs/heads/yaho-cube-planner e75770f65 -> c0a785d46 APACHE-KYLIN-2802: fix issue of CuboidStatsUtil.createAllDescendantsCache Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/abf4a97c Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/abf4a97c Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/abf4a97c Branch: refs/heads/yaho-cube-planner Commit: abf4a97cc4a7530dbdf51db4c51877bd1999fafd Parents: e75770f Author: Zhong Authored: Fri Aug 25 08:15:49 2017 +0800 Committer: Zhong Committed: Fri Aug 25 08:15:49 2017 +0800 ---------------------------------------------------------------------- .../cube/cuboid/algorithm/CuboidStats.java | 27 +++- .../cube/cuboid/algorithm/CuboidStatsUtil.java | 134 +++++++++++------- .../cuboid/algorithm/CuboidStatsUtilTest.java | 45 ++++--- .../engine/mr/common/CuboidRecommenderUtil.java | 2 +- .../engine/mr/common/CuboidStatsReaderUtil.java | 135 +++++++++++++++++++ .../kylin/engine/mr/common/CuboidStatsUtil.java | 135 ------------------- 6 files changed, 273 insertions(+), 205 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java index 1775d5a..e4b0167 100755 --- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java @@ -18,6 +18,7 @@ package org.apache.kylin.cube.cuboid.algorithm; +import java.util.List; import java.util.Map; import java.util.Set; @@ -120,7 +121,8 @@ public class CuboidStats { private ImmutableMap cuboidHitProbabilityMap; private ImmutableMap cuboidScanCountMap; - private ImmutableMap> allDescendantsCache; + private ImmutableMap> directChildrenCache; + private Map> allDescendantsCache; private CuboidStats(String key, long baseCuboidId, Set mandatoryCuboids, Map statistics, Map size, Map hitFrequencyMap, Map> scanCountSourceMap) { @@ -187,8 +189,10 @@ public class CuboidStats { } this.cuboidScanCountMap = ImmutableMap. builder().putAll(tmpCuboidScanCountMap).build(); - this.allDescendantsCache = ImmutableMap.> builder() - .putAll(CuboidStatsUtil.createAllDescendantsCache(statistics.keySet())).build(); + this.directChildrenCache = ImmutableMap.> builder() + .putAll(CuboidStatsUtil.createDirectChildrenCache(statistics.keySet())).build(); + + this.allDescendantsCache = Maps.newHashMap(); } private long getExpScanCount(long sourceCuboid, Map statistics, @@ -216,11 +220,26 @@ public class CuboidStats { public Set getAllDescendants(long cuboid) { Set allDescendants = Sets.newLinkedHashSet(); if (selectionCuboidSet.contains(cuboid)) { - return allDescendantsCache.get(cuboid); + if (allDescendantsCache.get(cuboid) != null) { + return allDescendantsCache.get(cuboid); + } else { + getAllDescendants(cuboid, allDescendants); + allDescendantsCache.put(cuboid, allDescendants); + } } return allDescendants; } + private void getAllDescendants(long cuboid, Set allDescendants) { + if (allDescendants.contains(cuboid)) { + return; + } + allDescendants.add(cuboid); + for (Long directChild : directChildrenCache.get(cuboid)) { + getAllDescendants(directChild, allDescendants); + } + } + public Set getAllCuboidsForSelection() { return selectionCuboidSet; } http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java index 6d5bbe5..c2683df 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java @@ -20,13 +20,13 @@ package org.apache.kylin.cube.cuboid.algorithm; import java.util.Collections; import java.util.Comparator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; -import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -102,62 +102,102 @@ public class CuboidStatsUtil { } } - /** Using dynamic programming to use extra space to reduce repetitive computation*/ - public static Map> createAllDescendantsCache(final Set cuboidSet) { - List latticeCuboidList = Lists.newArrayList(cuboidSet); - Collections.sort(latticeCuboidList); - - Map> allDescendantsCache = Maps.newHashMap(); - Set preNoneDescendants = Sets.newHashSet(); - for (int i = 0; i < latticeCuboidList.size(); i++) { - Long currentCuboid = latticeCuboidList.get(i); - Set descendants = Sets.newHashSet(currentCuboid); - Set curNoneDescendants = Sets.newHashSet(); - if (i > 0) { - long preCuboid = latticeCuboidList.get(i - 1); - if (isDescendant(preCuboid, currentCuboid)) { - descendants.addAll(allDescendantsCache.get(preCuboid)); - } else { - curNoneDescendants.add(preCuboid); - for (long cuboidToCheck : allDescendantsCache.get(preCuboid)) { - if (isDescendant(cuboidToCheck, currentCuboid)) { - descendants.addAll(allDescendantsCache.get(cuboidToCheck)); - } - } - } - } - for (long cuboidToCheck : preNoneDescendants) { - if (isDescendant(cuboidToCheck, currentCuboid)) { - descendants.addAll(allDescendantsCache.get(cuboidToCheck)); - } else { - curNoneDescendants.add(cuboidToCheck); + public static Map> createDirectChildrenCache(final Set cuboidSet) { + /** + * Sort the list by ascending order: + * */ + final List cuboidList = Lists.newArrayList(cuboidSet); + Collections.sort(cuboidList); + /** + * Sort the list by ascending order: + * 1. the more bit count of its value, the bigger + * 2. the larger of its value, the bigger + * */ + List layerIdxList = Lists.newArrayListWithExpectedSize(cuboidList.size()); + for (int i = 0; i < cuboidList.size(); i++) { + layerIdxList.add(i); + } + Collections.sort(layerIdxList, new Comparator() { + @Override + public int compare(Integer i1, Integer i2) { + Long o1 = cuboidList.get(i1); + Long o2 = cuboidList.get(i2); + int nBitDiff = Long.bitCount(o1) - Long.bitCount(o2); + if (nBitDiff != 0) { + return nBitDiff; } + return Long.compare(o1, o2); } + }); + /** + * Construct an index array for pointing the position in layerIdxList + * (layerCuboidList is for speeding up continuous iteration) + * */ + int[] toLayerIdxArray = new int[layerIdxList.size()]; + final List layerCuboidList = Lists.newArrayListWithExpectedSize(cuboidList.size()); + for (int i = 0; i < layerIdxList.size(); i++) { + int cuboidIdx = layerIdxList.get(i); + toLayerIdxArray[cuboidIdx] = i; + layerCuboidList.add(cuboidList.get(cuboidIdx)); + } - allDescendantsCache.put(currentCuboid, descendants); - preNoneDescendants = curNoneDescendants; + int[] previousLayerLastIdxArray = new int[layerIdxList.size()]; + int currentBitCount = 0; + int previousLayerLastIdx = -1; + for (int i = 0; i < layerIdxList.size(); i++) { + int cuboidIdx = layerIdxList.get(i); + int nBits = Long.bitCount(cuboidList.get(cuboidIdx)); + if (nBits > currentBitCount) { + currentBitCount = nBits; + previousLayerLastIdx = i - 1; + } + previousLayerLastIdxArray[i] = previousLayerLastIdx; } - return allDescendantsCache; + Map> directChildrenCache = Maps.newHashMap(); + for (int i = 0; i < cuboidList.size(); i++) { + Long currentCuboid = cuboidList.get(i); + LinkedList directChildren = Lists.newLinkedList(); + int lastLayerIdx = previousLayerLastIdxArray[toLayerIdxArray[i]]; + /** + * Choose one of the two scan strategies + * 1. cuboids are sorted by its value, like 1,2,3,4,... + * 2. cuboids are layered and sorted, like 1,2,4,8,...,3,5,... + * */ + if (i - 1 <= lastLayerIdx) { + /** + * 1. Adding cuboid by descending order + * */ + for (int j = i - 1; j >= 0; j--) { + checkAndAddDirectChild(directChildren, currentCuboid, cuboidList.get(j)); + } + } else { + /** + * 1. Adding cuboid by descending order + * 2. Check from lower cuboid layer + * */ + for (int j = lastLayerIdx; j >= 0; j--) { + checkAndAddDirectChild(directChildren, currentCuboid, layerCuboidList.get(j)); + } + } + directChildrenCache.put(currentCuboid, directChildren); + } + return directChildrenCache; } - @VisibleForTesting - static Map> createAllDescendantsCache2(final Set cuboidSet) { - List latticeCuboidList = Lists.newArrayList(cuboidSet); - - Map> allDescendantsCache = Maps.newHashMap(); - for (int i = 0; i < latticeCuboidList.size(); i++) { - Long currentCuboid = latticeCuboidList.get(i); - Set descendantSet = Sets.newHashSet(currentCuboid); - for (int j = 0; j < i; j++) { - Long checkCuboid = latticeCuboidList.get(j); - if (isDescendant(checkCuboid, currentCuboid)) { - descendantSet.add(checkCuboid); + private static void checkAndAddDirectChild(List directChildren, Long currentCuboid, Long checkedCuboid) { + if (isDescendant(checkedCuboid, currentCuboid)) { + boolean ifDirectChild = true; + for (long directChild : directChildren) { + if (isDescendant(checkedCuboid, directChild)) { + ifDirectChild = false; + break; } } - allDescendantsCache.put(currentCuboid, descendantSet); + if (ifDirectChild) { + directChildren.add(checkedCuboid); + } } - return allDescendantsCache; } public static boolean isDescendant(long cuboidToCheck, long parentCuboid) { http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java index 8e809cf..688b539 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java @@ -19,14 +19,15 @@ package org.apache.kylin.cube.cuboid.algorithm; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.TimeUnit; import org.junit.Assert; import org.junit.Test; import com.google.common.base.Stopwatch; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -122,16 +123,16 @@ public class CuboidStatsUtilTest { } @Test - public void createAllDescendantsCacheTest() { + public void createDirectChildrenCacheTest() { Set cuboidSet = generateCuboidSet(); - Map> allDescendantsCache = CuboidStatsUtil.createAllDescendantsCache(cuboidSet); - - Assert.assertTrue(allDescendantsCache.get(255L).containsAll(cuboidSet)); - - Assert.assertTrue(allDescendantsCache.get(239L).size() == 5); - - Assert.assertTrue(allDescendantsCache.get(50L).containsAll(Sets.newHashSet(50L, 2L))); - Assert.assertTrue(!allDescendantsCache.get(50L).contains(4L)); + Map> directChildrenCache = CuboidStatsUtil.createDirectChildrenCache(cuboidSet); + + Assert.assertTrue(directChildrenCache.get(255L).containsAll(Lists.newArrayList(239L, 159L, 50L))); + Assert.assertTrue(directChildrenCache.get(159L).contains(6L)); + Assert.assertTrue(directChildrenCache.get(50L).contains(2L)); + Assert.assertTrue(directChildrenCache.get(239L).contains(199L)); + Assert.assertTrue(directChildrenCache.get(199L).contains(6L)); + Assert.assertTrue(directChildrenCache.get(6L).containsAll(Lists.newArrayList(4L, 2L))); } private Set generateMassCuboidSet() { @@ -144,18 +145,26 @@ public class CuboidStatsUtilTest { } @Test - public void createAllDescendantsCacheStressTest() { + public void createDirectChildrenCacheStressTest() { Stopwatch sw = new Stopwatch(); sw.start(); Set cuboidSet = generateMassCuboidSet(); - System.out.println("Time elapsed for creating sorted cuboid list: " + sw.elapsed(TimeUnit.MILLISECONDS)); - sw.reset(); - sw.start(); - CuboidStatsUtil.createAllDescendantsCache(cuboidSet); - System.out.println("Time elapsed for creating descendants cache: " + sw.elapsed(TimeUnit.MILLISECONDS)); + System.out.println("Time elapsed for creating sorted cuboid list: " + sw.elapsedMillis()); sw.reset(); sw.start(); - CuboidStatsUtil.createAllDescendantsCache2(cuboidSet); - System.out.println("Time elapsed for creating descendants cache2: " + sw.elapsed(TimeUnit.MILLISECONDS)); + checkDirectChildrenCacheStressTest(CuboidStatsUtil.createDirectChildrenCache(cuboidSet)); + System.out.println("Time elapsed for creating direct children cache: " + sw.elapsedMillis()); + sw.stop(); + } + + private void checkDirectChildrenCacheStressTest(Map> directChildrenCache) { + for (Map.Entry> entry : directChildrenCache.entrySet()) { + if (Long.bitCount(entry.getKey()) == 1) { + Assert.assertTrue("Check for cuboid " + entry.getKey(), entry.getValue().size() == 0); + } else { + Assert.assertTrue("Check for cuboid " + entry.getKey(), + Long.bitCount(entry.getKey()) == entry.getValue().size()); + } + } } } http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java index a0b20fa..ba3f023 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java @@ -62,7 +62,7 @@ public class CuboidRecommenderUtil { public static Map getRecommendCuboidList(CubeInstance cube, Map hitFrequencyMap, Map> rollingUpCountSourceMap) throws IOException { - Pair, Map> statsPair = CuboidStatsUtil + Pair, Map> statsPair = CuboidStatsReaderUtil .readCuboidStatsAndSizeFromCube(cube.getCuboidScheduler().getAllCuboidIds(), cube); String key = cube.getName(); http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java new file mode 100644 index 0000000..aaf9aa3 --- /dev/null +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsReaderUtil.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.engine.mr.common; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.kylin.common.util.Pair; +import org.apache.kylin.cube.CubeInstance; +import org.apache.kylin.cube.CubeSegment; +import org.apache.kylin.measure.hllc.HLLCounter; +import org.apache.kylin.metadata.model.SegmentStatusEnum; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; + +public class CuboidStatsReaderUtil { + + private static final Logger logger = LoggerFactory.getLogger(CuboidStatsReaderUtil.class); + + public static Map readCuboidStatsFromCube(Set cuboidIds, CubeInstance cubeInstance) + throws IOException { + Map statisticsMerged = readCuboidStatsAndSizeFromCube(cuboidIds, cubeInstance).getFirst(); + return statisticsMerged.isEmpty() ? null : statisticsMerged; + } + + public static Pair, Map> readCuboidStatsAndSizeFromCube(Set cuboidIds, + CubeInstance cube) throws IOException { + Preconditions.checkNotNull(cuboidIds, "The cuboid set can not be null"); + Preconditions.checkNotNull(cube, "The cube instance can not be null"); + + List segmentList = cube.getSegments(SegmentStatusEnum.READY); + Map statisticsMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size()); + Map sizeMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size()); + readCuboidStatsFromSegments(cuboidIds, segmentList, statisticsMerged, sizeMerged); + return new Pair<>(statisticsMerged, sizeMerged); + } + + public static Map readCuboidStatsFromSegments(Set cuboidIds, List segmentList) + throws IOException { + Map statisticsMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size()); + readCuboidStatsFromSegments(cuboidIds, segmentList, statisticsMerged, + Maps. newHashMapWithExpectedSize(cuboidIds.size())); + return statisticsMerged.isEmpty() ? null : statisticsMerged; + } + + private static void readCuboidStatsFromSegments(Set cuboidSet, List segmentList, + final Map statisticsMerged, final Map sizeMerged) throws IOException { + if (segmentList == null || segmentList.isEmpty()) { + return; + } + int nSegment = segmentList.size(); + + Map cuboidHLLMapMerged = Maps.newHashMapWithExpectedSize(cuboidSet.size()); + Map sizeMapMerged = Maps.newHashMapWithExpectedSize(cuboidSet.size()); + for (CubeSegment pSegment : segmentList) { + CubeStatsReader pReader = new CubeStatsReader(pSegment, pSegment.getConfig()); + Map pHLLMap = pReader.getCuboidRowEstimatesHLLOrigin(); + if (pHLLMap == null || pHLLMap.isEmpty()) { + logger.info("Cuboid Statistics for segment " + pSegment.getName() + " is not enabled."); + nSegment--; + continue; + } + Map pSizeMap = pReader.getCuboidSizeMap(); + for (Long pCuboid : cuboidSet) { + HLLCounter pInnerHLL = pHLLMap.get(pCuboid); + Preconditions.checkNotNull(pInnerHLL, "statistics should exist for cuboid " + pCuboid + " of segment " + + pSegment.getCubeDesc().getName() + "[" + pSegment.getName() + "]"); + if (cuboidHLLMapMerged.get(pCuboid) != null) { + cuboidHLLMapMerged.get(pCuboid).merge(pInnerHLL); + } else { + cuboidHLLMapMerged.put(pCuboid, pInnerHLL); + } + + Double pSize = sizeMapMerged.get(pCuboid); + sizeMapMerged.put(pCuboid, pSize == null ? pSizeMap.get(pCuboid) : pSizeMap.get(pCuboid) + pSize); + } + } + + if (nSegment < 1) { + return; + } + for (Long pCuboid : cuboidSet) { + statisticsMerged.put(pCuboid, cuboidHLLMapMerged.get(pCuboid).getCountEstimate() / nSegment); + sizeMerged.put(pCuboid, sizeMapMerged.get(pCuboid) / nSegment); + } + } + + public static Map readCuboidStatsFromSegment(Set cuboidIds, CubeSegment cubeSegment) + throws IOException { + if (cubeSegment == null) { + logger.warn("The cube segment can not be " + null); + return null; + } + + CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, cubeSegment.getConfig()); + if (cubeStatsReader.getCuboidRowEstimatesHLL() == null + || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) { + logger.info("Cuboid Statistics is not enabled."); + return null; + } + + Map cuboidsWithStatsAll = cubeStatsReader.getCuboidRowEstimatesHLL(); + Map cuboidsWithStats = Maps.newHashMapWithExpectedSize(cuboidIds.size()); + for (Long cuboid : cuboidIds) { + Long rowEstimate = cuboidsWithStatsAll.get(cuboid); + if (rowEstimate == null) { + logger.warn("Cannot get the row count stats for cuboid " + cuboid); + } else { + cuboidsWithStats.put(cuboid, rowEstimate); + } + } + return cuboidsWithStats; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/abf4a97c/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsUtil.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsUtil.java deleted file mode 100644 index 5bb4179..0000000 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidStatsUtil.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.engine.mr.common; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.kylin.common.util.Pair; -import org.apache.kylin.cube.CubeInstance; -import org.apache.kylin.cube.CubeSegment; -import org.apache.kylin.measure.hllc.HLLCounter; -import org.apache.kylin.metadata.model.SegmentStatusEnum; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; - -public class CuboidStatsUtil { - - private static final Logger logger = LoggerFactory.getLogger(CuboidStatsUtil.class); - - public static Map readCuboidStatsFromCube(Set cuboidIds, CubeInstance cubeInstance) - throws IOException { - Map statisticsMerged = readCuboidStatsAndSizeFromCube(cuboidIds, cubeInstance).getFirst(); - return statisticsMerged.isEmpty() ? null : statisticsMerged; - } - - public static Pair, Map> readCuboidStatsAndSizeFromCube(Set cuboidIds, - CubeInstance cube) throws IOException { - Preconditions.checkNotNull(cuboidIds, "The cuboid set can not be null"); - Preconditions.checkNotNull(cube, "The cube instance can not be null"); - - List segmentList = cube.getSegments(SegmentStatusEnum.READY); - Map statisticsMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size()); - Map sizeMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size()); - readCuboidStatsFromSegments(cuboidIds, segmentList, statisticsMerged, sizeMerged); - return new Pair<>(statisticsMerged, sizeMerged); - } - - public static Map readCuboidStatsFromSegments(Set cuboidIds, List segmentList) - throws IOException { - Map statisticsMerged = Maps.newHashMapWithExpectedSize(cuboidIds.size()); - readCuboidStatsFromSegments(cuboidIds, segmentList, statisticsMerged, - Maps. newHashMapWithExpectedSize(cuboidIds.size())); - return statisticsMerged.isEmpty() ? null : statisticsMerged; - } - - private static void readCuboidStatsFromSegments(Set cuboidSet, List segmentList, - final Map statisticsMerged, final Map sizeMerged) throws IOException { - if (segmentList == null || segmentList.isEmpty()) { - return; - } - int nSegment = segmentList.size(); - - Map cuboidHLLMapMerged = Maps.newHashMapWithExpectedSize(cuboidSet.size()); - Map sizeMapMerged = Maps.newHashMapWithExpectedSize(cuboidSet.size()); - for (CubeSegment pSegment : segmentList) { - CubeStatsReader pReader = new CubeStatsReader(pSegment, pSegment.getConfig()); - Map pHLLMap = pReader.getCuboidRowEstimatesHLLOrigin(); - if (pHLLMap == null || pHLLMap.isEmpty()) { - logger.info("Cuboid Statistics for segment " + pSegment.getName() + " is not enabled."); - nSegment--; - continue; - } - Map pSizeMap = pReader.getCuboidSizeMap(); - for (Long pCuboid : cuboidSet) { - HLLCounter pInnerHLL = pHLLMap.get(pCuboid); - Preconditions.checkNotNull(pInnerHLL, "statistics should exist for cuboid " + pCuboid + " of segment " - + pSegment.getCubeDesc().getName() + "[" + pSegment.getName() + "]"); - if (cuboidHLLMapMerged.get(pCuboid) != null) { - cuboidHLLMapMerged.get(pCuboid).merge(pInnerHLL); - } else { - cuboidHLLMapMerged.put(pCuboid, pInnerHLL); - } - - Double pSize = sizeMapMerged.get(pCuboid); - sizeMapMerged.put(pCuboid, pSize == null ? pSizeMap.get(pCuboid) : pSizeMap.get(pCuboid) + pSize); - } - } - - if (nSegment < 1) { - return; - } - for (Long pCuboid : cuboidSet) { - statisticsMerged.put(pCuboid, cuboidHLLMapMerged.get(pCuboid).getCountEstimate() / nSegment); - sizeMerged.put(pCuboid, sizeMapMerged.get(pCuboid) / nSegment); - } - } - - public static Map readCuboidStatsFromSegment(Set cuboidIds, CubeSegment cubeSegment) - throws IOException { - if (cubeSegment == null) { - logger.warn("The cube segment can not be " + null); - return null; - } - - CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, cubeSegment.getConfig()); - if (cubeStatsReader.getCuboidRowEstimatesHLL() == null - || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) { - logger.info("Cuboid Statistics is not enabled."); - return null; - } - - Map cuboidsWithStatsAll = cubeStatsReader.getCuboidRowEstimatesHLL(); - Map cuboidsWithStats = Maps.newHashMapWithExpectedSize(cuboidIds.size()); - for (Long cuboid : cuboidIds) { - Long rowEstimate = cuboidsWithStatsAll.get(cuboid); - if (rowEstimate == null) { - logger.warn("Cannot get the row count stats for cuboid " + cuboid); - } else { - cuboidsWithStats.put(cuboid, rowEstimate); - } - } - return cuboidsWithStats; - } -}