Return-Path: X-Original-To: apmail-kylin-commits-archive@minotaur.apache.org Delivered-To: apmail-kylin-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 7AB5D199D7 for ; Mon, 14 Mar 2016 08:44:09 +0000 (UTC) Received: (qmail 89222 invoked by uid 500); 14 Mar 2016 08:44:09 -0000 Delivered-To: apmail-kylin-commits-archive@kylin.apache.org Received: (qmail 89177 invoked by uid 500); 14 Mar 2016 08:44:08 -0000 Mailing-List: contact commits-help@kylin.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kylin.apache.org Delivered-To: mailing list commits@kylin.apache.org Received: (qmail 89088 invoked by uid 99); 14 Mar 2016 08:44:08 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 14 Mar 2016 08:44:08 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 51A0DDFA40; Mon, 14 Mar 2016 08:44:08 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: liyang@apache.org To: commits@kylin.apache.org Date: Mon, 14 Mar 2016 08:44:10 -0000 Message-Id: In-Reply-To: <9243124765d74ba3a6623e6c5350bdc4@git.apache.org> References: <9243124765d74ba3a6623e6c5350bdc4@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [3/3] kylin git commit: KYLIN-1492 Custom dimension encoding support KYLIN-1492 Custom dimension encoding support Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/75027ed6 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/75027ed6 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/75027ed6 Branch: refs/heads/master Commit: 75027ed649c4dfba02e80004495875a31dc73702 Parents: 959c6dd Author: Li, Yang Authored: Fri Feb 26 14:43:59 2016 +0800 Committer: Li Yang Committed: Mon Mar 14 16:42:33 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/common/util/Dictionary.java | 232 ------------------- .../java/org/apache/kylin/cube/CubeManager.java | 2 +- .../java/org/apache/kylin/cube/CubeSegment.java | 21 +- .../kylin/cube/common/RowKeySplitter.java | 3 +- .../kylin/cube/gridtable/CubeCodeSystem.java | 141 ++--------- .../kylin/cube/gridtable/CubeGridTable.java | 42 +--- .../gridtable/CuboidToGridTableMapping.java | 11 + .../kylin/cube/gridtable/FixLenSerializer.java | 110 --------- .../cube/gridtable/TrimmedCubeCodeSystem.java | 148 +++++++----- .../inmemcubing/AbstractInMemCubeBuilder.java | 2 +- .../cube/inmemcubing/DoggedCubeBuilder.java | 2 +- .../cube/inmemcubing/InMemCubeBuilder.java | 2 +- .../InMemCubeBuilderInputConverter.java | 3 +- .../kylin/cube/kv/AbstractRowKeyEncoder.java | 4 +- .../org/apache/kylin/cube/kv/CubeDimEncMap.java | 80 +++++++ .../org/apache/kylin/cube/kv/RowConstants.java | 2 - .../apache/kylin/cube/kv/RowKeyColumnIO.java | 149 ++---------- .../org/apache/kylin/cube/kv/RowKeyDecoder.java | 4 +- .../org/apache/kylin/cube/kv/RowKeyEncoder.java | 4 +- .../org/apache/kylin/cube/model/RowKeyDesc.java | 12 +- .../org/apache/kylin/cube/util/CubingUtils.java | 2 +- .../kylin/gridtable/DefaultGTComparator.java | 10 +- .../kylin/cube/DictionaryManagerTest.java | 2 +- .../DoggedCubeBuilderStressTest.java | 2 +- .../cube/inmemcubing/DoggedCubeBuilderTest.java | 2 +- .../cube/inmemcubing/InMemCubeBuilderTest.java | 2 +- .../kylin/gridtable/DictGridTableTest.java | 18 +- .../apache/kylin/dict/DateStrDictionary.java | 2 +- .../org/apache/kylin/dict/DictCodeSystem.java | 4 +- .../apache/kylin/dict/DictionaryGenerator.java | 2 +- .../org/apache/kylin/dict/DictionaryInfo.java | 2 +- .../kylin/dict/DictionaryInfoSerializer.java | 2 +- .../apache/kylin/dict/DictionaryManager.java | 2 +- .../apache/kylin/dict/DictionarySerializer.java | 2 +- .../org/apache/kylin/dict/IDictionaryAware.java | 34 --- .../dict/MultipleDictionaryValueEnumerator.java | 2 +- .../apache/kylin/dict/TimeStrDictionary.java | 2 +- .../org/apache/kylin/dict/TrieDictionary.java | 2 +- .../dict/TupleFilterFunctionTransformer.java | 16 +- .../apache/kylin/dict/lookup/SnapshotTable.java | 2 +- .../apache/kylin/dict/NumberDictionaryTest.java | 2 +- .../kylin/dict/TimeStrDictionaryTests.java | 4 +- .../org/apache/kylin/dimension/Dictionary.java | 231 ++++++++++++++++++ .../kylin/dimension/DictionaryDimEnc.java | 137 +++++++++++ .../kylin/dimension/DimensionEncoding.java | 62 +++++ .../apache/kylin/dimension/FixedLenDimEnc.java | 136 +++++++++++ .../kylin/dimension/IDimensionEncodingMap.java | 40 ++++ .../apache/kylin/measure/MeasureIngester.java | 2 +- .../org/apache/kylin/measure/MeasureType.java | 2 +- .../kylin/measure/basic/BigDecimalIngester.java | 2 +- .../kylin/measure/basic/DoubleIngester.java | 2 +- .../kylin/measure/basic/LongIngester.java | 2 +- .../kylin/measure/bitmap/BitmapMeasureType.java | 2 +- .../ExtendedColumnMeasureType.java | 2 +- .../kylin/measure/hllc/HLLCMeasureType.java | 2 +- .../kylin/measure/topn/TopNMeasureType.java | 2 +- .../storage/translate/ColumnValueRange.java | 2 +- .../storage/translate/ColumnValueRangeTest.java | 2 +- .../kylin/engine/mr/common/CubeStatsReader.java | 4 +- .../engine/mr/steps/BaseCuboidMapperBase.java | 2 +- .../engine/mr/steps/InMemCuboidMapper.java | 2 +- .../engine/mr/steps/MergeCuboidMapper.java | 2 +- .../engine/mr/steps/MergeCuboidMapperTest.java | 2 +- .../apache/kylin/engine/spark/SparkCubing.java | 6 +- .../streaming/OneOffStreamingBuilder.java | 2 +- .../engine/streaming/StreamingBatchBuilder.java | 2 +- .../streaming/cube/StreamingCubeBuilder.java | 2 +- .../apache/kylin/invertedindex/IISegment.java | 7 +- .../index/CompressedValueContainer.java | 2 +- .../invertedindex/index/RawTableRecord.java | 4 +- .../apache/kylin/invertedindex/index/Slice.java | 2 +- .../kylin/invertedindex/index/SliceBuilder.java | 2 +- .../kylin/invertedindex/index/TableRecord.java | 2 +- .../invertedindex/index/TableRecordInfo.java | 2 +- .../invertedindex/model/IIKeyValueCodec.java | 2 +- .../invertedindex/util/IIDictionaryBuilder.java | 2 +- .../kylin/invertedindex/IIInstanceTest.java | 2 +- .../invertedindex/InvertedIndexLocalTest.java | 2 +- .../hbase/common/coprocessor/AggrKey.java | 4 +- .../common/coprocessor/CoprocessorFilter.java | 6 +- .../common/coprocessor/CoprocessorRowType.java | 2 +- .../common/coprocessor/FilterDecorator.java | 18 +- .../storage/hbase/cube/v1/CubeStorageQuery.java | 2 +- .../hbase/cube/v1/CubeTupleConverter.java | 2 +- .../coprocessor/observer/ObserverEnabler.java | 2 +- .../v1/coprocessor/observer/ObserverTuple.java | 2 +- .../hbase/cube/v2/CubeSegmentScanner.java | 4 +- .../hbase/cube/v2/CubeTupleConverter.java | 2 +- .../endpoint/BitMapFilterEvaluator.java | 2 +- .../endpoint/ClearTextDictionary.java | 31 +-- .../ii/coprocessor/endpoint/IIEndpoint.java | 21 +- .../coprocessor/endpoint/LocalDictionary.java | 31 ++- .../endpoint/BitMapFilterEvaluatorTest.java | 2 +- 93 files changed, 1026 insertions(+), 878 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java b/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java deleted file mode 100644 index 6d3fa62..0000000 --- a/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.common.util; - -import java.io.PrintStream; -import java.io.Serializable; -import java.io.UnsupportedEncodingException; - -import org.apache.kylin.common.persistence.Writable; - -/** - * A bi-way dictionary that maps from dimension/column values to IDs and vice - * versa. By storing IDs instead of real values, the size of cube is - * significantly reduced. - * - * - IDs are smallest integers possible for the cardinality of a column, for the - * purpose of minimal storage space - IDs preserve ordering of values, such that - * range query can be applied to IDs directly - * - * A dictionary once built, is immutable. This allows optimal memory footprint - * by e.g. flatten the Trie structure into a byte array, replacing node pointers - * with array offsets. - * - * @author yangli9 - */ -@SuppressWarnings("serial") -abstract public class Dictionary implements Writable, Serializable { - - public static final byte NULL = (byte) 0xff; - - // ID with all bit-1 (0xff e.g.) reserved for NULL value - public static final int NULL_ID[] = new int[] { 0, 0xff, 0xffff, 0xffffff, 0xffffffff }; - - abstract public int getMinId(); - - abstract public int getMaxId(); - - public int getSize() { - return getMaxId() - getMinId() + 1; - } - - /** - * @return the size of an ID in bytes, determined by the cardinality of column - */ - abstract public int getSizeOfId(); - - /** - * @return the (maximum) size of value in bytes, determined by the longest value - */ - abstract public int getSizeOfValue(); - - /** - * @return true if each entry of this dict is contained by the dict in param - */ - abstract public boolean contains(Dictionary another); - - /** - * Convenient form of getIdFromValue(value, 0) - */ - final public int getIdFromValue(T value) throws IllegalArgumentException { - return getIdFromValue(value, 0); - } - - /** - * Returns the ID integer of given value. In case of not found - *

- * - if roundingFlag=0, throw IllegalArgumentException;
- * - if roundingFlag<0, the closest smaller ID integer if exist;
- * - if roundingFlag>0, the closest bigger ID integer if exist.
- *

- * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes() - * - * @throws IllegalArgumentException - * if value is not found in dictionary and rounding is off; - * or if rounding cannot find a smaller or bigger ID - */ - final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException { - if (isNullObjectForm(value)) - return nullId(); - else - return getIdFromValueImpl(value, roundingFlag); - } - - final public boolean containsValue(T value) throws IllegalArgumentException { - if (isNullObjectForm(value)) { - return true; - } else { - try { - //if no key found, it will throw exception - getIdFromValueImpl(value, 0); - } catch (IllegalArgumentException e) { - return false; - } - return true; - } - } - - protected boolean isNullObjectForm(T value) { - return value == null; - } - - abstract protected int getIdFromValueImpl(T value, int roundingFlag); - - /** - * @return the value corresponds to the given ID - * @throws IllegalArgumentException - * if ID is not found in dictionary - */ - final public T getValueFromId(int id) throws IllegalArgumentException { - if (isNullId(id)) - return null; - else - return getValueFromIdImpl(id); - } - - abstract protected T getValueFromIdImpl(int id); - - /** - * Convenient form of - * getIdFromValueBytes(value, offset, len, 0) - */ - final public int getIdFromValueBytes(byte[] value, int offset, int len) throws IllegalArgumentException { - return getIdFromValueBytes(value, offset, len, 0); - } - - /** - * A lower level API, return ID integer from raw value bytes. In case of not found - *

- * - if roundingFlag=0, throw IllegalArgumentException;
- * - if roundingFlag<0, the closest smaller ID integer if exist;
- * - if roundingFlag>0, the closest bigger ID integer if exist.
- *

- * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value). - * - * @throws IllegalArgumentException - * if value is not found in dictionary and rounding is off; - * or if rounding cannot find a smaller or bigger ID - */ - final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) throws IllegalArgumentException { - if (isNullByteForm(value, offset, len)) - return nullId(); - else { - int id = getIdFromValueBytesImpl(value, offset, len, roundingFlag); - if (id < 0) - throw new IllegalArgumentException("Value not exists!"); - return id; - } - } - - protected boolean isNullByteForm(byte[] value, int offset, int len) { - return value == null; - } - - abstract protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag); - - final public byte[] getValueBytesFromId(int id) { - if (isNullId(id)) - return BytesUtil.EMPTY_BYTE_ARRAY; - else - return getValueBytesFromIdImpl(id); - } - - abstract protected byte[] getValueBytesFromIdImpl(int id); - - /** - * A lower level API, get byte values from ID, return the number of bytes - * written. Bypassing the cache layer, this could be significantly slower - * than getIdFromValue(T value). - * - * @return size of value bytes, 0 if empty string, -1 if null - * - * @throws IllegalArgumentException - * if ID is not found in dictionary - */ - final public int getValueBytesFromId(int id, byte[] returnValue, int offset) throws IllegalArgumentException { - if (isNullId(id)) - return -1; - else - return getValueBytesFromIdImpl(id, returnValue, offset); - } - - abstract protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset); - - abstract public void dump(PrintStream out); - - public int nullId() { - return NULL_ID[getSizeOfId()]; - } - - public boolean isNullId(int id) { - int nullId = NULL_ID[getSizeOfId()]; - return (nullId & id) == nullId; - } - - /** utility that converts a dictionary ID to string, preserving order */ - public static String dictIdToString(byte[] idBytes, int offset, int length) { - try { - return new String(idBytes, offset, length, "ISO-8859-1"); - } catch (UnsupportedEncodingException e) { - // never happen - return null; - } - } - - /** the reverse of dictIdToString(), returns integer ID */ - public static int stringToDictId(String str) { - try { - byte[] bytes = str.getBytes("ISO-8859-1"); - return BytesUtil.readUnsigned(bytes, 0, bytes.length); - } catch (UnsupportedEncodingException e) { - // never happen - return 0; - } - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java index 4951ce6..f53a1f0 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java @@ -32,7 +32,6 @@ import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.Serializer; import org.apache.kylin.common.restclient.Broadcaster; import org.apache.kylin.common.restclient.CaseInsensitiveStringCache; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.Pair; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.DimensionDesc; @@ -42,6 +41,7 @@ import org.apache.kylin.dict.DistinctColumnValuesProvider; import org.apache.kylin.dict.lookup.LookupStringTable; import org.apache.kylin.dict.lookup.SnapshotManager; import org.apache.kylin.dict.lookup.SnapshotTable; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.model.SegmentStatusEnum; import org.apache.kylin.metadata.model.TableDesc; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java index 5b61c10..ead6a41 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java @@ -25,12 +25,12 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.kylin.common.persistence.ResourceStore; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ShardingHash; +import org.apache.kylin.cube.kv.CubeDimEncMap; import org.apache.kylin.cube.kv.RowConstants; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc; -import org.apache.kylin.dict.IDictionaryAware; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.model.IJoinedFlatTableDesc; import org.apache.kylin.metadata.model.SegmentStatusEnum; import org.apache.kylin.metadata.model.TblColRef; @@ -45,7 +45,7 @@ import java.util.TimeZone; import java.util.concurrent.ConcurrentHashMap; @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) -public class CubeSegment implements Comparable, IDictionaryAware, IRealizationSegment { +public class CubeSegment implements Comparable, IRealizationSegment { @JsonBackReference private CubeInstance cubeInstance; @@ -277,20 +277,13 @@ public class CubeSegment implements Comparable, IDictionaryAware, I return result; } - @Override - public int getColumnLength(TblColRef col) { - Dictionary dict = getDictionary(col); - if (dict == null) { - return this.getCubeDesc().getRowkey().getColumnLength(col); - } else { - return dict.getSizeOfId(); - } - } - - @Override public Dictionary getDictionary(TblColRef col) { return CubeManager.getInstance(this.getCubeInstance().getConfig()).getDictionary(this, col); } + + public CubeDimEncMap getDimensionEncodingMap() { + return new CubeDimEncMap(this); + } public void validate() { if (cubeInstance.getDescriptor().getModel().getPartitionDesc().isPartitioned() && dateRangeStart >= dateRangeEnd) http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/common/RowKeySplitter.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/common/RowKeySplitter.java b/core-cube/src/main/java/org/apache/kylin/cube/common/RowKeySplitter.java index 56247bc..a8be987 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/common/RowKeySplitter.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/common/RowKeySplitter.java @@ -22,6 +22,7 @@ import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.SplittedBytes; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.cube.cuboid.Cuboid; +import org.apache.kylin.cube.kv.CubeDimEncMap; import org.apache.kylin.cube.kv.RowConstants; import org.apache.kylin.cube.kv.RowKeyColumnIO; import org.apache.kylin.cube.model.CubeDesc; @@ -58,7 +59,7 @@ public class RowKeySplitter { public RowKeySplitter(CubeSegment cubeSeg, int splitLen, int bytesLen) { this.enableSharding = cubeSeg.isEnableSharding(); this.cubeDesc = cubeSeg.getCubeDesc(); - this.colIO = new RowKeyColumnIO(cubeSeg); + this.colIO = new RowKeyColumnIO(new CubeDimEncMap(cubeSeg)); this.splitBuffers = new SplittedBytes[splitLen]; for (int i = 0; i < splitLen; i++) { http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeCodeSystem.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeCodeSystem.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeCodeSystem.java index f15d3f5..e0dc4dd 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeCodeSystem.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeCodeSystem.java @@ -22,9 +22,10 @@ import java.nio.ByteBuffer; import java.util.Collections; import java.util.Map; -import org.apache.kylin.common.util.BytesUtil; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ImmutableBitSet; +import org.apache.kylin.dimension.DictionaryDimEnc; +import org.apache.kylin.dimension.DictionaryDimEnc.DictionarySerializer; +import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.gridtable.DefaultGTComparator; import org.apache.kylin.gridtable.GTInfo; import org.apache.kylin.gridtable.IGTCodeSystem; @@ -32,8 +33,6 @@ import org.apache.kylin.gridtable.IGTComparator; import org.apache.kylin.measure.MeasureAggregator; import org.apache.kylin.metadata.datatype.DataTypeSerializer; -import com.google.common.collect.Maps; - /** * defines how column values will be encoded to/ decoded from GTRecord * @@ -43,63 +42,44 @@ import com.google.common.collect.Maps; @SuppressWarnings({ "rawtypes", "unchecked" }) public class CubeCodeSystem implements IGTCodeSystem { - // ============================================================================ - private GTInfo info; - private Map dictionaryMap; // column index ==> dictionary of column - private Map fixLenMap; // column index ==> fixed length of column - private Map dependentMetricsMap; + private DimensionEncoding[] dimEncs; private DataTypeSerializer[] serializers; private IGTComparator comparator; + private Map dependentMetricsMap; - public CubeCodeSystem(Map dictionaryMap) { - this(dictionaryMap, Collections. emptyMap(), Collections. emptyMap()); + public CubeCodeSystem(DimensionEncoding[] dimEncs) { + this(dimEncs, Collections. emptyMap()); } - public CubeCodeSystem(Map dictionaryMap, Map fixLenMap, Map dependentMetricsMap) { - this.dictionaryMap = dictionaryMap; - this.fixLenMap = fixLenMap; + public CubeCodeSystem(DimensionEncoding[] dimEncs, Map dependentMetricsMap) { + this.dimEncs = dimEncs; + this.comparator = new DefaultGTComparator(); this.dependentMetricsMap = dependentMetricsMap; } public TrimmedCubeCodeSystem trimForCoprocessor() { - Map dictSizes = Maps.newHashMap(); - Map fixedLengthSizes = Maps.newHashMap(); - - for (int i = 0; i < serializers.length; i++) { - if (serializers[i] instanceof DictionarySerializer) { - dictSizes.put(i,serializers[i].maxLength()); - } else if(serializers[i] instanceof FixLenSerializer) { - fixedLengthSizes.put(i,serializers[i].maxLength()); - } - } - - return new TrimmedCubeCodeSystem(dependentMetricsMap,dictSizes,fixedLengthSizes); + return new TrimmedCubeCodeSystem(dimEncs, dependentMetricsMap); } @Override public void init(GTInfo info) { this.info = info; - serializers = new DataTypeSerializer[info.getColumnCount()]; - for (int i = 0; i < info.getColumnCount(); i++) { - // dimension with dictionary - if (dictionaryMap.get(i) != null) { - serializers[i] = new DictionarySerializer(dictionaryMap.get(i)); - } - // dimension of fixed length - else if (fixLenMap.get(i) != null) { - serializers[i] = new FixLenSerializer(fixLenMap.get(i)); + this.serializers = new DataTypeSerializer[info.getColumnCount()]; + for (int i = 0; i < serializers.length; i++) { + DimensionEncoding dimEnc = i < dimEncs.length ? dimEncs[i] : null; + + // for dimensions + if (dimEnc != null) { + serializers[i] = dimEnc.asDataTypeSerializer(); } - // metrics + // for measures else { serializers[i] = DataTypeSerializer.create(info.getColumnType(i)); } } - - //when changing this, also take care of TrimmedCubeCodeSystem.init - this.comparator = new DefaultGTComparator(); } @Override @@ -126,10 +106,14 @@ public class CubeCodeSystem implements IGTCodeSystem { public void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) { DataTypeSerializer serializer = serializers[col]; if (serializer instanceof DictionarySerializer) { - ((DictionarySerializer) serializer).serializeWithRounding(value, roundingFlag, buf); + DictionaryDimEnc dictEnc = ((DictionaryDimEnc) dimEncs[col]); + if (dictEnc.getRoundingFlag() != roundingFlag) { + serializer = dictEnc.copy(roundingFlag).asDataTypeSerializer(); + } + serializer.serialize(value, buf); } else { if (value instanceof String) { - // for dimensions mostly, measures are converted by MeasureIngestor before reaching this point + // for dimensions; measures are converted by MeasureIngestor before reaching this point value = serializer.valueOf((String) value); } serializer.serialize(value, buf); @@ -169,79 +153,4 @@ public class CubeCodeSystem implements IGTCodeSystem { return result; } - static class TrimmedDictionarySerializer extends DataTypeSerializer { - - final int fieldSize; - - public TrimmedDictionarySerializer(int fieldSize) { - this.fieldSize = fieldSize; - } - - @Override - public int peekLength(ByteBuffer in) { - return fieldSize; - } - - @Override - public int maxLength() { - return fieldSize; - } - - @Override - public int getStorageBytesEstimate() { - return fieldSize; - } - - @Override - public void serialize(Object value, ByteBuffer out) { - throw new UnsupportedOperationException(); - } - - @Override - public Object deserialize(ByteBuffer in) { - throw new UnsupportedOperationException(); - } - } - - static class DictionarySerializer extends DataTypeSerializer { - private Dictionary dictionary; - - DictionarySerializer(Dictionary dictionary) { - this.dictionary = dictionary; - } - - public void serializeWithRounding(Object value, int roundingFlag, ByteBuffer buf) { - int id = dictionary.getIdFromValue(value, roundingFlag); - BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), buf); - } - - @Override - public void serialize(Object value, ByteBuffer buf) { - int id = dictionary.getIdFromValue(value); - BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), buf); - } - - @Override - public Object deserialize(ByteBuffer in) { - int id = BytesUtil.readUnsigned(in, dictionary.getSizeOfId()); - return dictionary.getValueFromId(id); - } - - @Override - public int peekLength(ByteBuffer in) { - return dictionary.getSizeOfId(); - } - - @Override - public int maxLength() { - return dictionary.getSizeOfId(); - } - - @Override - public int getStorageBytesEstimate() { - return dictionary.getSizeOfId(); - } - - } - } http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeGridTable.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeGridTable.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeGridTable.java index 47885f7..62ba6eb 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeGridTable.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeGridTable.java @@ -21,17 +21,18 @@ package org.apache.kylin.cube.gridtable; import java.util.List; import java.util.Map; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.cube.cuboid.Cuboid; +import org.apache.kylin.cube.kv.CubeDimEncMap; import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.dimension.Dictionary; +import org.apache.kylin.dimension.IDimensionEncodingMap; import org.apache.kylin.gridtable.GTInfo; import org.apache.kylin.metadata.model.TblColRef; import com.google.common.collect.Maps; -@SuppressWarnings("rawtypes") public class CubeGridTable { public static Map> getDimensionToDictionaryMap(CubeSegment cubeSeg, long cuboidId) { @@ -52,44 +53,21 @@ public class CubeGridTable { } public static GTInfo newGTInfo(CubeSegment cubeSeg, long cuboidId) { - Map> dictionaryMap = getDimensionToDictionaryMap(cubeSeg, cuboidId); Cuboid cuboid = Cuboid.findById(cubeSeg.getCubeDesc(), cuboidId); - for (TblColRef dim : cuboid.getColumns()) { - if (cubeSeg.getCubeDesc().getRowkey().isUseDictionary(dim)) { - Dictionary dict = dictionaryMap.get(dim); - if (dict == null) { - throw new RuntimeException("Dictionary for " + dim + " is not found"); - } - } - } - - return newGTInfo(cubeSeg.getCubeDesc(), cuboidId, dictionaryMap); + return newGTInfo(cuboid, new CubeDimEncMap(cubeSeg)); } public static GTInfo newGTInfo(CubeDesc cubeDesc, long cuboidId, Map> dictionaryMap) { Cuboid cuboid = Cuboid.findById(cubeDesc, cuboidId); + return newGTInfo(cuboid, new CubeDimEncMap(cubeDesc, dictionaryMap)); + } + + public static GTInfo newGTInfo(Cuboid cuboid, IDimensionEncodingMap dimEncMap) { CuboidToGridTableMapping mapping = new CuboidToGridTableMapping(cuboid); - Map dictionaryByColIdx = Maps.newHashMap(); - Map fixLenByColIdx = Maps.newHashMap(); - - for (TblColRef dim : cuboid.getColumns()) { - int colIndex = mapping.getIndexOf(dim); - if (cubeDesc.getRowkey().isUseDictionary(dim)) { - Dictionary dict = dictionaryMap.get(dim); - dictionaryByColIdx.put(colIndex, dict); - } else { - int len = cubeDesc.getRowkey().getColumnLength(dim); - if (len == 0) - throw new IllegalStateException(); - - fixLenByColIdx.put(colIndex, len); - } - } - GTInfo.Builder builder = GTInfo.builder(); - builder.setTableName("Cuboid " + cuboidId); - builder.setCodeSystem(new CubeCodeSystem(dictionaryByColIdx, fixLenByColIdx, mapping.getDependentMetricsMap())); + builder.setTableName("Cuboid " + cuboid.getId()); + builder.setCodeSystem(new CubeCodeSystem(mapping.getDimensionEncodings(dimEncMap), mapping.getDependentMetricsMap())); builder.setColumns(mapping.getDataTypes()); builder.setPrimaryKey(mapping.getPrimaryKey()); builder.enableColumnBlock(mapping.getColumnBlocks()); http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMapping.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMapping.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMapping.java index aad37fd..75ab140 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMapping.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CuboidToGridTableMapping.java @@ -28,6 +28,8 @@ import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.cube.cuboid.Cuboid; import org.apache.kylin.cube.model.HBaseColumnDesc; import org.apache.kylin.cube.model.HBaseColumnFamilyDesc; +import org.apache.kylin.dimension.DimensionEncoding; +import org.apache.kylin.dimension.IDimensionEncodingMap; import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.MeasureDesc; @@ -152,6 +154,15 @@ public class CuboidToGridTableMapping { return cuboid.getColumns(); } + public DimensionEncoding[] getDimensionEncodings(IDimensionEncodingMap dimEncMap) { + List dims = cuboid.getColumns(); + DimensionEncoding[] dimEncs = new DimensionEncoding[dims.size()]; + for (int i = 0; i < dimEncs.length; i++) { + dimEncs[i] = dimEncMap.get(dims.get(i)); + } + return dimEncs; + } + public Map getDependentMetricsMap() { Map result = Maps.newHashMap(); List measures = cuboid.getCubeDesc().getMeasures(); http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/gridtable/FixLenSerializer.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/FixLenSerializer.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/FixLenSerializer.java deleted file mode 100644 index 24c4a19..0000000 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/FixLenSerializer.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.gridtable; - -import java.nio.ByteBuffer; -import java.util.Arrays; - -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.cube.kv.RowConstants; -import org.apache.kylin.metadata.datatype.DataTypeSerializer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class FixLenSerializer extends DataTypeSerializer { - - private static Logger logger = LoggerFactory.getLogger(FixLenSerializer.class); - - // be thread-safe and avoid repeated obj creation - private ThreadLocal current = new ThreadLocal(); - - private int fixLen; - transient int avoidVerbose = 0; - - FixLenSerializer(int fixLen) { - this.fixLen = fixLen; - } - - private byte[] currentBuf() { - byte[] buf = current.get(); - if (buf == null) { - buf = new byte[fixLen]; - current.set(buf); - } - return buf; - } - - @Override - public void serialize(Object value, ByteBuffer out) { - byte[] buf = currentBuf(); - if (value == null) { - Arrays.fill(buf, Dictionary.NULL); - out.put(buf); - } else { - byte[] bytes = Bytes.toBytes(value.toString()); - if (bytes.length > fixLen) { - if (avoidVerbose++ % 10000 == 0) { - logger.warn("Expect at most " + fixLen + " bytes, but got " + bytes.length + ", will truncate, value string: " + value.toString() + " times:" + avoidVerbose); - } - } - out.put(bytes, 0, Math.min(bytes.length, fixLen)); - for (int i = bytes.length; i < fixLen; i++) { - out.put(RowConstants.ROWKEY_PLACE_HOLDER_BYTE); - } - } - } - - @Override - public Object deserialize(ByteBuffer in) { - byte[] buf = currentBuf(); - in.get(buf); - - int tail = fixLen; - while (tail > 0 && (buf[tail - 1] == RowConstants.ROWKEY_PLACE_HOLDER_BYTE || buf[tail - 1] == Dictionary.NULL)) { - tail--; - } - - if (tail == 0) { - return buf[0] == Dictionary.NULL ? null : ""; - } - - return Bytes.toString(buf, 0, tail); - } - - @Override - public int peekLength(ByteBuffer in) { - return fixLen; - } - - @Override - public int maxLength() { - return fixLen; - } - - @Override - public int getStorageBytesEstimate() { - return fixLen; - } - - @Override - public Object valueOf(String str) { - return str; - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java index 6048ba0..e5169d2 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java @@ -26,6 +26,7 @@ import java.util.Map; import org.apache.kylin.common.util.BytesSerializer; import org.apache.kylin.common.util.BytesUtil; import org.apache.kylin.common.util.ImmutableBitSet; +import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.gridtable.DefaultGTComparator; import org.apache.kylin.gridtable.GTInfo; import org.apache.kylin.gridtable.IGTCodeSystem; @@ -35,44 +36,44 @@ import org.apache.kylin.metadata.datatype.DataTypeSerializer; import com.google.common.collect.Maps; +/** + * A limited code system where dimension value ser/des is disabled. + * Used inside coprocessor only. Because dictionary is not available. + */ @SuppressWarnings({ "rawtypes", "unchecked" }) public class TrimmedCubeCodeSystem implements IGTCodeSystem { - private Map dependentMetricsMap; - private Map dictSizes; - private Map fixedLengthSize; + private GTInfo info; - private transient GTInfo info; - private transient DataTypeSerializer[] serializers; - private transient IGTComparator comparator; + private DimensionEncoding[] dimEncs; + private DataTypeSerializer[] serializers; + private IGTComparator comparator; + private Map dependentMetricsMap; - public TrimmedCubeCodeSystem(Map dependentMetricsMap, Map dictSizes, Map fixedLengthSize) { + public TrimmedCubeCodeSystem(DimensionEncoding[] dimEncs, Map dependentMetricsMap) { + this.dimEncs = dimEncs; + this.comparator = new DefaultGTComparator(); this.dependentMetricsMap = dependentMetricsMap; - this.dictSizes = dictSizes; - this.fixedLengthSize = fixedLengthSize; } @Override public void init(GTInfo info) { this.info = info; - serializers = new DataTypeSerializer[info.getColumnCount()]; - for (int i = 0; i < info.getColumnCount(); i++) { - // dimension with dictionary - if (dictSizes.get(i) != null) { - serializers[i] = new CubeCodeSystem.TrimmedDictionarySerializer(dictSizes.get(i)); - } - // dimension of fixed length - else if (fixedLengthSize.get(i) != null) { - serializers[i] = new FixLenSerializer(fixedLengthSize.get(i)); + this.serializers = new DataTypeSerializer[info.getColumnCount()]; + for (int i = 0; i < serializers.length; i++) { + DimensionEncoding dimEnc = i < dimEncs.length ? dimEncs[i] : null; + + // for dimensions + if (dimEnc != null) { + // use trimmed serializer cause no dictionary in coprocessor + serializers[i] = new TrimmedDimensionSerializer(dimEnc.getLengthOfEncoding()); } - // metrics + // for measures else { serializers[i] = DataTypeSerializer.create(info.getColumnType(i)); } } - - this.comparator = new DefaultGTComparator(); } @Override @@ -98,11 +99,6 @@ public class TrimmedCubeCodeSystem implements IGTCodeSystem { @Override public void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) { DataTypeSerializer serializer = serializers[col]; - - // if (((value instanceof String) && !(serializer instanceof StringSerializer || serializer instanceof CubeCodeSystem.FixLenSerializer))) { - // value = serializer.valueOf((String) value); - // } - serializer.serialize(value, buf); } @@ -149,49 +145,95 @@ public class TrimmedCubeCodeSystem implements IGTCodeSystem { BytesUtil.writeVInt(x.getValue(), out); } - BytesUtil.writeVInt(value.dictSizes.size(), out); - for (Map.Entry x : value.dictSizes.entrySet()) { - BytesUtil.writeVInt(x.getKey(), out); - BytesUtil.writeVInt(x.getValue(), out); - } - - BytesUtil.writeVInt(value.fixedLengthSize.size(), out); - for (Map.Entry x : value.fixedLengthSize.entrySet()) { - BytesUtil.writeVInt(x.getKey(), out); - BytesUtil.writeVInt(x.getValue(), out); + BytesUtil.writeVInt(value.dimEncs.length, out); + for (int i = 0; i < value.dimEncs.length; i++) { + DimensionEncoding enc = value.dimEncs[i]; + BytesUtil.writeVInt(enc == null ? 0 : enc.getLengthOfEncoding(), out); } } @Override public TrimmedCubeCodeSystem deserialize(ByteBuffer in) { Map dependentMetricsMap = Maps.newHashMap(); - Map dictSizes = Maps.newHashMap(); - Map fixedLengthSize = Maps.newHashMap(); - - int size = 0; - size = BytesUtil.readVInt(in); + int size = BytesUtil.readVInt(in); for (int i = 0; i < size; ++i) { int key = BytesUtil.readVInt(in); int value = BytesUtil.readVInt(in); dependentMetricsMap.put(key, value); } - size = BytesUtil.readVInt(in); - for (int i = 0; i < size; ++i) { - int key = BytesUtil.readVInt(in); - int value = BytesUtil.readVInt(in); - dictSizes.put(key, value); + DimensionEncoding[] dimEncs = new DimensionEncoding[BytesUtil.readVInt(in)]; + for (int i = 0; i < dimEncs.length; i++) { + int fixedLen = BytesUtil.readVInt(in); + if (fixedLen > 0) + dimEncs[i] = new TrimmedDimEnc(fixedLen); } - size = BytesUtil.readVInt(in); - for (int i = 0; i < size; ++i) { - int key = BytesUtil.readVInt(in); - int value = BytesUtil.readVInt(in); - fixedLengthSize.put(key, value); - } - return new TrimmedCubeCodeSystem(dependentMetricsMap, dictSizes, fixedLengthSize); + return new TrimmedCubeCodeSystem(dimEncs, dependentMetricsMap); } }; + static class TrimmedDimEnc extends DimensionEncoding { + final int fixedLen; + + TrimmedDimEnc(int fixedLen) { + this.fixedLen = fixedLen; + } + + @Override + public int getLengthOfEncoding() { + return fixedLen; + } + + @Override + public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { + throw new UnsupportedOperationException(); + } + + @Override + public String decode(byte[] bytes, int offset, int len) { + throw new UnsupportedOperationException(); + } + + @Override + public DataTypeSerializer asDataTypeSerializer() { + throw new UnsupportedOperationException(); + } + } + + static class TrimmedDimensionSerializer extends DataTypeSerializer { + + final int fixedLen; + + public TrimmedDimensionSerializer(int fixedLen) { + this.fixedLen = fixedLen; + } + + @Override + public int peekLength(ByteBuffer in) { + return fixedLen; + } + + @Override + public int maxLength() { + return fixedLen; + } + + @Override + public int getStorageBytesEstimate() { + return fixedLen; + } + + @Override + public void serialize(Object value, ByteBuffer out) { + throw new UnsupportedOperationException(); + } + + @Override + public Object deserialize(ByteBuffer in) { + throw new UnsupportedOperationException(); + } + } + } http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/AbstractInMemCubeBuilder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/AbstractInMemCubeBuilder.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/AbstractInMemCubeBuilder.java index 335a769..c567c9e 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/AbstractInMemCubeBuilder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/AbstractInMemCubeBuilder.java @@ -21,8 +21,8 @@ import java.util.List; import java.util.Map; import java.util.concurrent.BlockingQueue; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.gridtable.GTScanRequest; import org.apache.kylin.gridtable.GridTable; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java index af6ef82..c210bf9 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java @@ -30,10 +30,10 @@ import java.util.concurrent.ConcurrentNavigableMap; import java.util.concurrent.TimeUnit; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.common.util.MemoryBudgetController; import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.gridtable.GTScanRequest; import org.apache.kylin.gridtable.IGTScanner; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java index c270d3f..ee5a757 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java @@ -30,7 +30,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.kylin.measure.topn.Counter; import org.apache.kylin.measure.topn.TopNCounter; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.common.util.MemoryBudgetController; import org.apache.kylin.common.util.MemoryBudgetController.MemoryWaterLevel; @@ -39,6 +38,7 @@ import org.apache.kylin.cube.cuboid.Cuboid; import org.apache.kylin.cube.cuboid.CuboidScheduler; import org.apache.kylin.cube.gridtable.CubeGridTable; import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTAggregateScanner; import org.apache.kylin.gridtable.GTBuilder; import org.apache.kylin.gridtable.GTInfo; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderInputConverter.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderInputConverter.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderInputConverter.java index a1eb6da..4c62bc6 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderInputConverter.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderInputConverter.java @@ -22,10 +22,11 @@ import org.apache.kylin.common.util.Bytes; import java.util.List; import java.util.Map; -import org.apache.kylin.common.util.Dictionary; import com.google.common.collect.Lists; + import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTInfo; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.measure.MeasureIngester; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java index 62432f7..37b33aa 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java @@ -21,10 +21,10 @@ package org.apache.kylin.cube.kv; import java.util.Map; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.cube.cuboid.Cuboid; +import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.metadata.model.TblColRef; import org.slf4j.Logger; @@ -38,7 +38,7 @@ import org.slf4j.LoggerFactory; public abstract class AbstractRowKeyEncoder { protected static final Logger logger = LoggerFactory.getLogger(AbstractRowKeyEncoder.class); - public static final byte DEFAULT_BLANK_BYTE = Dictionary.NULL; + public static final byte DEFAULT_BLANK_BYTE = DimensionEncoding.NULL; protected byte blankByte = DEFAULT_BLANK_BYTE; protected final CubeSegment cubeSeg; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/kv/CubeDimEncMap.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/CubeDimEncMap.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/CubeDimEncMap.java new file mode 100644 index 0000000..e3c81f1 --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/CubeDimEncMap.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.kv; + +import java.util.Map; + +import org.apache.kylin.cube.CubeSegment; +import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.cube.model.RowKeyColDesc; +import org.apache.kylin.dimension.Dictionary; +import org.apache.kylin.dimension.DictionaryDimEnc; +import org.apache.kylin.dimension.DimensionEncoding; +import org.apache.kylin.dimension.FixedLenDimEnc; +import org.apache.kylin.dimension.IDimensionEncodingMap; +import org.apache.kylin.metadata.model.TblColRef; + +import com.google.common.collect.Maps; + +public class CubeDimEncMap implements IDimensionEncodingMap { + + final private CubeDesc cubeDesc; + final private CubeSegment seg; + final private Map> dictionaryMap; + final private Map encMap = Maps.newHashMap(); + + public CubeDimEncMap(CubeSegment seg) { + this.cubeDesc = seg.getCubeDesc(); + this.seg = seg; + this.dictionaryMap = null; + } + + public CubeDimEncMap(CubeDesc cubeDesc, Map> dictionaryMap) { + this.cubeDesc = cubeDesc; + this.seg = null; + this.dictionaryMap = dictionaryMap; + } + + @Override + public DimensionEncoding get(TblColRef col) { + DimensionEncoding result = encMap.get(col); + if (result == null) { + RowKeyColDesc colDesc = cubeDesc.getRowkey().getColDesc(col); + if (colDesc.isUsingDictionary()) { + // dictionary encoding + result = new DictionaryDimEnc(getDictionary(col)); + } + else { + // fixed length encoding + result = new FixedLenDimEnc(colDesc.getLength()); + } + encMap.put(col, result); + } + return result; + } + + @Override + public Dictionary getDictionary(TblColRef col) { + if (seg == null) + return dictionaryMap.get(col); + else + return seg.getDictionary(col); + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java index 3510915..987fb55 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java @@ -22,8 +22,6 @@ public class RowConstants { public static final int ROWKEY_COL_DEFAULT_LENGTH = 256; - // row key fixed length place holder - public static final byte ROWKEY_PLACE_HOLDER_BYTE = 9; // row key lower bound public static final byte ROWKEY_LOWER_BYTE = 0; // row key upper bound http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java index 1d57cf9..9af1018 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java @@ -18,161 +18,46 @@ package org.apache.kylin.cube.kv; -import java.util.Arrays; - -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.BytesUtil; -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.dict.IDictionaryAware; +import org.apache.kylin.dimension.Dictionary; +import org.apache.kylin.dimension.DictionaryDimEnc; +import org.apache.kylin.dimension.DimensionEncoding; +import org.apache.kylin.dimension.IDimensionEncodingMap; import org.apache.kylin.metadata.model.TblColRef; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Read/Write column values from/into bytes * * @author yangli9 */ -@SuppressWarnings("unchecked") public class RowKeyColumnIO { - private static final Logger logger = LoggerFactory.getLogger(RowKeyColumnIO.class); - - private IDictionaryAware IDictionaryAwareness; + //private static final Logger logger = LoggerFactory.getLogger(RowKeyColumnIO.class); - public RowKeyColumnIO(IDictionaryAware IDictionaryAwareness) { - this.IDictionaryAwareness = IDictionaryAwareness; - } + private final IDimensionEncodingMap dimEncMap; - public IDictionaryAware getIDictionaryAware() { - return IDictionaryAwareness; + public RowKeyColumnIO(IDimensionEncodingMap dimEncMap) { + this.dimEncMap = dimEncMap; } public int getColumnLength(TblColRef col) { - return IDictionaryAwareness.getColumnLength(col); + return dimEncMap.get(col).getLengthOfEncoding(); } - //TODO is type cast really necessary here? public Dictionary getDictionary(TblColRef col) { - return (Dictionary) IDictionaryAwareness.getDictionary(col); - } - - public void writeColumnWithoutDictionary(byte[] src, int srcOffset, int srcLength, byte[] dst, int dstOffset, int dstLength) { - if (srcLength >= dstLength) { - System.arraycopy(src, srcOffset, dst, dstOffset, dstLength); - } else { - System.arraycopy(src, srcOffset, dst, dstOffset, srcLength); - Arrays.fill(dst, dstOffset + srcLength, dstOffset + dstLength, RowConstants.ROWKEY_PLACE_HOLDER_BYTE); - } + return dimEncMap.getDictionary(col); } - public void writeColumnWithDictionary(Dictionary dictionary, byte[] src, int srcOffset, int srcLength, byte[] dst, int dstOffset, int dstLength, int roundingFlag, int defaultValue) { - // dict value - try { - int id = dictionary.getIdFromValueBytes(src, srcOffset, srcLength, roundingFlag); - BytesUtil.writeUnsigned(id, dst, dstOffset, dictionary.getSizeOfId()); - } catch (IllegalArgumentException ex) { - Arrays.fill(dst, dstOffset, dstOffset + dstLength, (byte) defaultValue); - logger.error("Can't translate value " + Bytes.toString(src, srcOffset, srcLength) + " to dictionary ID, roundingFlag " + roundingFlag + ". Using default value " + String.format("\\x%02X", defaultValue)); - } - } - - public void writeColumn(TblColRef column, byte[] value, int valueLen, byte defaultValue, byte[] output, int outputOffset) { - writeColumn(column, value, valueLen, 0, defaultValue, output, outputOffset); - } - - public void writeColumn(TblColRef column, byte[] value, int valueLen, int roundingFlag, byte defaultValue, byte[] output, int outputOffset) { - - final Dictionary dict = getDictionary(column); - final int columnLen = getColumnLength(column); + public void writeColumn(TblColRef col, byte[] value, int valueLen, int roundingFlag, byte defaultValue, byte[] output, int outputOffset) { + DimensionEncoding dimEnc = dimEncMap.get(col); + if (dimEnc instanceof DictionaryDimEnc) + dimEnc = ((DictionaryDimEnc) dimEnc).copy(roundingFlag, defaultValue); - // non-dict value - if (dict == null) { - byte[] valueBytes = padFixLen(columnLen, value, valueLen); - System.arraycopy(valueBytes, 0, output, outputOffset, columnLen); - return; - } - - // dict value - try { - int id = dict.getIdFromValueBytes(value, 0, valueLen, roundingFlag); - BytesUtil.writeUnsigned(id, output, outputOffset, dict.getSizeOfId()); - } catch (IllegalArgumentException ex) { - for (int i = outputOffset; i < outputOffset + columnLen; i++) { - output[i] = defaultValue; - } - logger.error("Can't translate value " + Bytes.toString(value, 0, valueLen) + " to dictionary ID, roundingFlag " + roundingFlag + ". Using default value " + String.format("\\x%02X", defaultValue)); - } - } - - private byte[] padFixLen(int length, byte[] valueBytes, int valLen) { - if (valLen == length) { - return valueBytes; - } else if (valLen < length) { - byte[] newValueBytes = new byte[length]; - System.arraycopy(valueBytes, 0, newValueBytes, 0, valLen); - Arrays.fill(newValueBytes, valLen, length, RowConstants.ROWKEY_PLACE_HOLDER_BYTE); - return newValueBytes; - } else { - return Arrays.copyOf(valueBytes, length); - } + dimEnc.encode(value, valueLen, output, outputOffset); } public String readColumnString(TblColRef col, byte[] bytes, int offset, int length) { - Dictionary dict = getDictionary(col); - if (dict == null) { - if (isNull(bytes, offset, length)) { - return null; - } - bytes = removeFixLenPad(bytes, offset, length); - return Bytes.toString(bytes); - } else { - int id = BytesUtil.readUnsigned(bytes, offset, length); - try { - String value = dict.getValueFromId(id); - return value; - } catch (IllegalArgumentException e) { - logger.error("Can't get dictionary value for column " + col.getName() + " (id = " + id + ")"); - return ""; - } - } - } - - public String readColumnString(TblColRef col, byte[] bytes, int bytesLen) { - return readColumnString(col, bytes, 0, bytesLen); - } - - private boolean isNull(byte[] bytes, int offset, int length) { - // all 0xFF is NULL - if (length == 0) { - return false; - } - for (int i = 0; i < bytes.length; i++) { - if (bytes[i + offset] != AbstractRowKeyEncoder.DEFAULT_BLANK_BYTE) { - return false; - } - } - return true; - } - - private byte[] removeFixLenPad(byte[] bytes, int offset, int length) { - int padCount = 0; - for (int i = 0; i < length; i++) { - if (bytes[i + offset] == RowConstants.ROWKEY_PLACE_HOLDER_BYTE) { - padCount++; - } - } - - int size = length - padCount; - byte[] stripBytes = new byte[size]; - int index = 0; - for (int i = 0; i < length; i++) { - byte vb = bytes[i + offset]; - if (vb != RowConstants.ROWKEY_PLACE_HOLDER_BYTE) { - stripBytes[index++] = vb; - } - } - return stripBytes; + DimensionEncoding dimEnc = dimEncMap.get(col); + return dimEnc.decode(bytes, offset, length); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyDecoder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyDecoder.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyDecoder.java index e4a6a52..d5948d4 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyDecoder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyDecoder.java @@ -46,7 +46,7 @@ public class RowKeyDecoder { public RowKeyDecoder(CubeSegment cubeSegment) { this.cubeDesc = cubeSegment.getCubeDesc(); this.rowKeySplitter = new RowKeySplitter(cubeSegment, 65, 255); - this.colIO = new RowKeyColumnIO(cubeSegment); + this.colIO = new RowKeyColumnIO(cubeSegment.getDimensionEncodingMap()); this.values = new ArrayList(); } @@ -77,7 +77,7 @@ public class RowKeyDecoder { } private void collectValue(TblColRef col, byte[] valueBytes, int length) throws IOException { - String strValue = colIO.readColumnString(col, valueBytes, length); + String strValue = colIO.readColumnString(col, valueBytes, 0, length); values.add(strValue); } http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java index 990cf06..05afdbf 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java @@ -44,7 +44,7 @@ public class RowKeyEncoder extends AbstractRowKeyEncoder { public RowKeyEncoder(CubeSegment cubeSeg, Cuboid cuboid) { super(cubeSeg, cuboid); enableSharding = cubeSeg.isEnableSharding(); - colIO = new RowKeyColumnIO(cubeSeg); + colIO = new RowKeyColumnIO(cubeSeg.getDimensionEncodingMap()); for (TblColRef column : cuboid.getColumns()) { bodyLength += colIO.getColumnLength(column); } @@ -160,7 +160,7 @@ public class RowKeyEncoder extends AbstractRowKeyEncoder { return; } - colIO.writeColumn(column, value, valueLen, this.blankByte, outputValue, outputValueOffset); + colIO.writeColumn(column, value, valueLen, 0, this.blankByte, outputValue, outputValueOffset); } protected byte defaultValue() { http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java index 7254df0..3f33505 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java @@ -67,17 +67,7 @@ public class RowKeyDesc { return getColDesc(col).getBitIndex(); } - /** - * caller of this method must make sure that col is NOT using dictionary - * otherwise this will always return 0 for dict columns - * @param col - * @return - */ - public int getColumnLength(TblColRef col) { - return getColDesc(col).getLength(); - } - - private RowKeyColDesc getColDesc(TblColRef col) { + public RowKeyColDesc getColDesc(TblColRef col) { RowKeyColDesc desc = columnMap.get(col); if (desc == null) throw new NullPointerException("Column " + col + " does not exist in row key desc"); http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java index bcb2caf..7392e4c 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java @@ -43,7 +43,6 @@ import javax.annotation.Nullable; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.cube.cuboid.Cuboid; @@ -54,6 +53,7 @@ import org.apache.kylin.dict.DictionaryGenerator; import org.apache.kylin.dict.DictionaryInfo; import org.apache.kylin.dict.DictionaryManager; import org.apache.kylin.dict.IterableDictionaryValueEnumerator; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter; import org.apache.kylin.metadata.model.TblColRef; import org.apache.kylin.source.ReadableTable; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/main/java/org/apache/kylin/gridtable/DefaultGTComparator.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/DefaultGTComparator.java b/core-cube/src/main/java/org/apache/kylin/gridtable/DefaultGTComparator.java index c0fe939..57e0d8b 100644 --- a/core-cube/src/main/java/org/apache/kylin/gridtable/DefaultGTComparator.java +++ b/core-cube/src/main/java/org/apache/kylin/gridtable/DefaultGTComparator.java @@ -19,18 +19,12 @@ package org.apache.kylin.gridtable; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.DimensionEncoding; public class DefaultGTComparator implements IGTComparator { @Override public boolean isNull(ByteArray code) { - // all 0xff is null - byte[] array = code.array(); - for (int i = 0, j = code.offset(), n = code.length(); i < n; i++, j++) { - if (array[j] != Dictionary.NULL) - return false; - } - return true; + return DimensionEncoding.isNull(code.array(), code.offset(), code.length()); } @Override http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java b/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java index e4dd8f5..4a9c2d3 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java @@ -23,12 +23,12 @@ import static org.junit.Assert.assertTrue; import java.util.HashSet; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.JsonUtil; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.dict.DictionaryInfo; import org.apache.kylin.dict.DictionaryManager; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.model.TblColRef; import org.junit.After; import org.junit.Before; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderStressTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderStressTest.java b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderStressTest.java index c25bad7..8d8366e 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderStressTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderStressTest.java @@ -26,10 +26,10 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.metadata.model.TblColRef; import org.junit.AfterClass; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderTest.java b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderTest.java index 832584c..80e3df1 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderTest.java @@ -33,10 +33,10 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.metadata.model.TblColRef; import org.junit.AfterClass; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderTest.java b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderTest.java index d96de4f..88573c6 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderTest.java @@ -33,7 +33,6 @@ import java.util.concurrent.Future; import org.apache.commons.io.FileUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; @@ -42,6 +41,7 @@ import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc; import org.apache.kylin.dict.DictionaryGenerator; import org.apache.kylin.dict.IterableDictionaryValueEnumerator; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.MeasureDesc; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java b/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java index 674aa15..517299f 100644 --- a/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java +++ b/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java @@ -17,7 +17,7 @@ package org.apache.kylin.gridtable; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; import java.io.IOException; import java.math.BigDecimal; @@ -25,17 +25,18 @@ import java.nio.ByteBuffer; import java.util.Arrays; import java.util.BitSet; import java.util.List; -import java.util.Map; import org.apache.kylin.common.util.ByteArray; import org.apache.kylin.common.util.BytesSerializer; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.common.util.Pair; import org.apache.kylin.cube.gridtable.CubeCodeSystem; import org.apache.kylin.dict.NumberDictionaryBuilder; import org.apache.kylin.dict.StringBytesConverter; import org.apache.kylin.dict.TrieDictionaryBuilder; +import org.apache.kylin.dimension.Dictionary; +import org.apache.kylin.dimension.DictionaryDimEnc; +import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.gridtable.GTInfo.Builder; import org.apache.kylin.gridtable.memstore.GTSimpleMemStore; import org.apache.kylin.metadata.datatype.DataType; @@ -55,7 +56,6 @@ import org.junit.Before; import org.junit.Test; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; public class DictGridTableTest { @@ -466,12 +466,12 @@ public class DictGridTableTest { return info; } - @SuppressWarnings("rawtypes") + @SuppressWarnings("unchecked") private static CubeCodeSystem newDictCodeSystem() { - Map dictionaryMap = Maps.newHashMap(); - dictionaryMap.put(1, newDictionaryOfInteger()); - dictionaryMap.put(2, newDictionaryOfString()); - return new CubeCodeSystem(dictionaryMap); + DimensionEncoding[] dimEncs = new DimensionEncoding[3]; + dimEncs[1] = new DictionaryDimEnc(newDictionaryOfInteger()); + dimEncs[2] = new DictionaryDimEnc(newDictionaryOfString()); + return new CubeCodeSystem(dimEncs); } @SuppressWarnings("rawtypes") http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java index 62b06aa..b666229 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java @@ -30,7 +30,7 @@ import java.io.UnsupportedEncodingException; import java.util.Date; import org.apache.commons.lang.StringUtils; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; /** * A dictionary for date string (date only, no time). http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-dictionary/src/main/java/org/apache/kylin/dict/DictCodeSystem.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictCodeSystem.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictCodeSystem.java index 158ce86..06758c0 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictCodeSystem.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictCodeSystem.java @@ -21,7 +21,7 @@ package org.apache.kylin.dict; import java.nio.ByteBuffer; import org.apache.kylin.common.util.BytesUtil; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.metadata.filter.IFilterCodeSystem; /** @@ -44,7 +44,7 @@ public class DictCodeSystem implements IFilterCodeSystem { String v = value; for (int i = 0, n = v.length(); i < n; i++) { - if ((byte) v.charAt(i) != Dictionary.NULL) + if ((byte) v.charAt(i) != DimensionEncoding.NULL) return false; } return true; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java index 4b01e60..df6781a 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java @@ -27,8 +27,8 @@ import java.util.List; import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.JsonUtil; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.source.ReadableTable; import org.slf4j.Logger; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java index 4fba59a..f38c95f 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java @@ -20,7 +20,7 @@ package org.apache.kylin.dict; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.RootPersistentEntity; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.source.ReadableTable.TableSignature; import com.fasterxml.jackson.annotation.JsonAutoDetect; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java index 69b29fe..47844be 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java @@ -24,8 +24,8 @@ import java.io.IOException; import org.apache.kylin.common.persistence.Serializer; import org.apache.kylin.common.util.ClassUtil; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.JsonUtil; +import org.apache.kylin.dimension.Dictionary; /** * @author yangli9 http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java index d49e43d..ce04b55 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java @@ -31,7 +31,7 @@ import java.util.concurrent.TimeUnit; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.ResourceStore; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.metadata.model.DataModelDesc; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-dictionary/src/main/java/org/apache/kylin/dict/DictionarySerializer.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionarySerializer.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionarySerializer.java index 72dc995..6d51f4a 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionarySerializer.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionarySerializer.java @@ -28,7 +28,7 @@ import java.io.OutputStream; import org.apache.kylin.common.util.ByteArray; import org.apache.kylin.common.util.ClassUtil; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; /** */ http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryAware.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryAware.java b/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryAware.java deleted file mode 100644 index 4586163..0000000 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryAware.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.dict; - -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.metadata.model.TblColRef; - -/** - * - * Class that implement this interface has the ability to help dictionary encoding and decoding - */ -public interface IDictionaryAware { - - public abstract int getColumnLength(TblColRef col); - - public abstract Dictionary getDictionary(TblColRef col); - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java index df7b1c6..96448a4 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java @@ -21,7 +21,7 @@ package org.apache.kylin.dict; import com.google.common.collect.Lists; import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; import java.io.IOException; import java.util.List; http://git-wip-us.apache.org/repos/asf/kylin/blob/75027ed6/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java index 3c96d08..ab38b13 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java @@ -25,7 +25,7 @@ import java.io.PrintStream; import java.io.UnsupportedEncodingException; import org.apache.kylin.common.util.DateFormat; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; /** */