kylin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From liy...@apache.org
Subject [1/2] incubator-kylin git commit: KYLIN-752 improve query performance for values that are not in dictionary
Date Thu, 11 Jun 2015 10:07:58 GMT
Repository: incubator-kylin
Updated Branches:
  refs/heads/0.7-staging f0f8521b8 -> aaec68018


KYLIN-752 improve query performance for values that are not in dictionary

A quick fix to drop the not-in-dictionary values before generating hbase
scan range.
And based on it, we can make a further optimization that as long as
there is at least one andFilter is empty(after we drop the
not-in-dictionary values), we can simply assert that the whole
andFilters is empty to avoid an unnecessary hbase scan.
However, this strategy only works for 'EQ', 'IN'. For 'LT', 'LTE', 'GT',
'GTE', we have to replace them with the some sort of closest smaller or
bigger values by calling "final public int getIdFromValue(T value, int
roundingFlag)" of "Dictionary" class.

Signed-off-by: Li, Yang <yangli9@ebay.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/eaf66856
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/eaf66856
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/eaf66856

Branch: refs/heads/0.7-staging
Commit: eaf66856178feb79ebe55467808c476520da55c6
Parents: f0f8521
Author: superhua <huanghua@mininglamp.com>
Authored: Thu May 21 17:56:48 2015 +0800
Committer: Li, Yang <yangli9@ebay.com>
Committed: Thu Jun 11 17:55:50 2015 +0800

----------------------------------------------------------------------
 .../kylin/storage/hbase/CubeStorageEngine.java  |  20 ++-
 .../hbase/TupleFilterValueOptimizer.java        | 136 +++++++++++++++++++
 2 files changed, 152 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/eaf66856/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
index 5c6be3b..06e1cd2 100644
--- a/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/CubeStorageEngine.java
@@ -421,8 +421,10 @@ public class CubeStorageEngine implements IStorageEngine {
             }
 
             Collection<ColumnValueRange> andRanges = translateToAndDimRanges(andFilter.getChildren(),
cubeSegment);
-
-            result.add(andRanges);
+            // ignore the empty-AND
+            if (andRanges != null && !andRanges.isEmpty()) {
+                result.add(andRanges);
+            }
         }
 
         return preprocessConstantConditions(result);
@@ -458,6 +460,7 @@ public class CubeStorageEngine implements IStorageEngine {
 
     private Collection<ColumnValueRange> translateToAndDimRanges(List<? extends
TupleFilter> andFilters, CubeSegment cubeSegment) {
         Map<TblColRef, ColumnValueRange> rangeMap = new HashMap<TblColRef, ColumnValueRange>();
+        boolean isEmptyAnd = false;
         for (TupleFilter filter : andFilters) {
             if ((filter instanceof CompareTupleFilter) == false) {
                 continue;
@@ -468,11 +471,20 @@ public class CubeStorageEngine implements IStorageEngine {
                 continue;
             }
 
-            ColumnValueRange range = new ColumnValueRange(comp.getColumn(), comp.getValues(),
comp.getOperator());
+            // optimize the values of tuple filter
+            Collection<String> newValues = TupleFilterValueOptimizer.doOptimization(cubeSegment,
comp.getColumn(), comp);
+            // in case the current filter is an empty-AND, do not generate ColumnValueRange
+            // and also set isEmptyAnd to true so that an empty AND-list will be returned
+            if (TupleFilterValueOptimizer.isEmptyAnd(comp, newValues)) {
+                isEmptyAnd = true;
+                break;
+            }
+
+            ColumnValueRange range = new ColumnValueRange(comp.getColumn(), newValues, comp.getOperator());
             andMerge(range, rangeMap);
 
         }
-        return rangeMap.values();
+        return isEmptyAnd ? Collections.<ColumnValueRange> emptyList() : rangeMap.values();
     }
 
     private void andMerge(ColumnValueRange range, Map<TblColRef, ColumnValueRange>
rangeMap) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/eaf66856/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java
b/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java
new file mode 100644
index 0000000..49d3143
--- /dev/null
+++ b/storage/src/main/java/org/apache/kylin/storage/hbase/TupleFilterValueOptimizer.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.storage.hbase;
+
+import org.apache.kylin.cube.CubeSegment;
+import org.apache.kylin.cube.kv.RowKeyColumnIO;
+import org.apache.kylin.dict.Dictionary;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+
+import java.util.Collection;
+import java.util.LinkedList;
+
+/**
+ * @author Huang, Hua
+ */
+public class TupleFilterValueOptimizer {
+
+    private static boolean isInDictionary(Dictionary<String> dict, String value) {
+        boolean inFlag = true;
+        try {
+            int id = dict.getIdFromValue(value, 0);
+        } catch (IllegalArgumentException ex) {
+            inFlag = false;
+        }
+
+        return inFlag;
+    }
+
+    private static Collection<String> removeNonDictionaryValues(CubeSegment cubeSegment,
TblColRef column, Collection<String> values) {
+        RowKeyColumnIO rowKeyColumnIO = new RowKeyColumnIO(cubeSegment);
+
+        Dictionary<String> dict = rowKeyColumnIO.getDictionary(column);
+        // in case that dict is null, just return values
+        if (dict == null) return values;
+
+        Collection<String> newValues = new LinkedList<String>();
+        for (String value : values) {
+            if (isInDictionary(dict, value)) newValues.add(value);
+        }
+
+        return newValues;
+    }
+
+    private static Collection<String> roundDictionaryValues(CubeSegment cubeSegment,
TblColRef column, Collection<String> values, int roundingFlag) {
+        RowKeyColumnIO rowKeyColumnIO = new RowKeyColumnIO(cubeSegment);
+
+        Dictionary<String> dict = rowKeyColumnIO.getDictionary(column);
+        // in case that dict is null, just return values
+        if (dict == null) return values;
+
+        Collection<String> newValues = new LinkedList<String>();
+        for (String value : values) {
+            if (isInDictionary(dict, value)) {
+                newValues.add(value);
+            }
+            else {
+                try {
+                    int id = dict.getIdFromValue(value, roundingFlag);
+                    String newValue = dict.getValueFromId(id);
+                    newValues.add(newValue);
+                } catch (IllegalArgumentException ex) {
+                }
+            }
+        }
+
+        return newValues;
+    }
+
+    private static Collection<String> optimizeCompareTupleFilter(CubeSegment cubeSegment,
TblColRef column, CompareTupleFilter comp) {
+        Collection<String> newValues = comp.getValues();
+        switch (comp.getOperator()) {
+            case EQ:
+            case IN:
+                newValues = removeNonDictionaryValues(cubeSegment, column, comp.getValues());
+                break;
+            case LT:
+            case LTE:
+                newValues = roundDictionaryValues(cubeSegment, column, comp.getValues(),
-1);
+                break;
+            case GT:
+            case GTE:
+                newValues = roundDictionaryValues(cubeSegment, column, comp.getValues(),
1);
+                break;
+            default:
+                break;
+        }
+
+        return newValues;
+    }
+
+    public static Collection<String> doOptimization(CubeSegment cubeSegment, TblColRef
column, TupleFilter filter) {
+        if (filter instanceof CompareTupleFilter) {
+            return optimizeCompareTupleFilter(cubeSegment, column, (CompareTupleFilter)filter);
+        }
+
+        return filter.getValues();
+    }
+
+    public static boolean isEmptyAnd(TupleFilter filter, Collection<String> values)
{
+        boolean isEmptyAnd = false;
+        switch (filter.getOperator()) {
+            case EQ:
+            case IN:
+            case LT:
+            case LTE:
+            case GT:
+            case GTE:
+                if (values == null || values.isEmpty()) {
+                    isEmptyAnd = true;
+                }
+                break;
+            default:
+                break;
+        }
+
+        return isEmptyAnd;
+    }
+}


Mime
View raw message