kylin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From liy...@apache.org
Subject kylin git commit: KYLIN-2322 fix TictionaryDictionaryForest cache bug and add CacheDictionary interface
Date Tue, 03 Jan 2017 07:43:34 GMT
Repository: kylin
Updated Branches:
  refs/heads/master d7971d9bd -> 1d6a36bf6


KYLIN-2322 fix TictionaryDictionaryForest cache bug and add CacheDictionary interface

Signed-off-by: Li Yang <liyang@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/1d6a36bf
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/1d6a36bf
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/1d6a36bf

Branch: refs/heads/master
Commit: 1d6a36bf66085e0bed79876379b21677ec55788f
Parents: d7971d9
Author: xiefan46 <958034172@qq.com>
Authored: Wed Dec 28 18:10:07 2016 +0800
Committer: Li Yang <liyang@apache.org>
Committed: Tue Jan 3 15:43:24 2017 +0800

----------------------------------------------------------------------
 .../apache/kylin/common/KylinConfigBase.java    |   2 +-
 .../apache/kylin/common/util/Dictionary.java    |   2 +
 .../apache/kylin/dict/AppendTrieDictionary.java |  46 +----
 .../org/apache/kylin/dict/CacheDictionary.java  | 107 +++++++++++
 .../apache/kylin/dict/DateStrDictionary.java    |   1 +
 .../org/apache/kylin/dict/NumberDictionary.java |  26 +--
 .../apache/kylin/dict/TimeStrDictionary.java    |   1 +
 .../org/apache/kylin/dict/TrieDictionary.java   | 137 +-------------
 .../apache/kylin/dict/TrieDictionaryForest.java |  45 +----
 .../kylin/dict/TrieDictionaryForestBuilder.java |   1 -
 .../MultipleDictionaryValueEnumeratorTest.java  |   1 +
 .../dict/TrieDictionaryForestBenchmark.java     | 180 +++++++++++++++++++
 .../kylin/dict/TrieDictionaryForestTest.java    |  35 ++--
 13 files changed, 325 insertions(+), 259 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index d73b694..bb8880b 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -217,7 +217,7 @@ abstract public class KylinConfigBase implements Serializable {
     // ============================================================================
 
     public boolean isUseForestTrieDictionary() {
-        return Boolean.parseBoolean(getOptional("kylin.dictionary.use-forest-trie", "false"));
+        return Boolean.parseBoolean(getOptional("kylin.dictionary.use-forest-trie", "true"));
     }
 
     public int getTrieDictionaryForestMaxTrieSizeMB() {

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java b/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java
index 1e172bc..03996a7 100644
--- a/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java
+++ b/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java
@@ -254,4 +254,6 @@ abstract public class Dictionary<T> implements Serializable {
      */
     public abstract void readFields(DataInput in) throws IOException;
 
+
+
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java
b/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java
index 32bfde6..503c29e 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java
@@ -27,10 +27,8 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.io.UnsupportedEncodingException;
-import java.lang.ref.SoftReference;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.IdentityHashMap;
 import java.util.LinkedList;
@@ -76,7 +74,7 @@ import org.slf4j.LoggerFactory;
  * @author sunyerui
  */
 @SuppressWarnings({ "rawtypes", "unchecked", "serial" })
-public class AppendTrieDictionary<T> extends Dictionary<T> {
+public class AppendTrieDictionary<T> extends CacheDictionary<T> {
 
     public static final byte[] HEAD_MAGIC = new byte[] { 0x41, 0x70, 0x70, 0x65, 0x63, 0x64,
0x54, 0x72, 0x69, 0x65, 0x44, 0x69, 0x63, 0x74 }; // "AppendTrieDict"
     public static final int HEAD_SIZE_I = HEAD_MAGIC.length;
@@ -87,22 +85,16 @@ public class AppendTrieDictionary<T> extends Dictionary<T>
{
     private static final Logger logger = LoggerFactory.getLogger(AppendTrieDictionary.class);
 
     transient private String baseDir;
-    transient private int baseId;
     transient private int maxId;
     transient private int maxValueLength;
     transient private int nValues;
-    transient private BytesConverter<T> bytesConverter;
 
     volatile private TreeMap<DictSliceKey, DictSlice> dictSliceMap;
 
-    transient private boolean enableValueCache = true;
-    transient private SoftReference<HashMap> valueToIdCache;
 
     // Constructor both for build and deserialize
     public AppendTrieDictionary() {
-        if (enableValueCache) {
-            valueToIdCache = new SoftReference<>(new HashMap());
-        }
+        enableCache();
     }
 
     public void initParams(String baseDir, int baseId, int maxId, int maxValueLength, int
nValues, BytesConverter bytesConverter) throws IOException {
@@ -111,7 +103,7 @@ public class AppendTrieDictionary<T> extends Dictionary<T>
{
         this.maxId = maxId;
         this.maxValueLength = maxValueLength;
         this.nValues = nValues;
-        this.bytesConverter = bytesConverter;
+        this.bytesConvert = bytesConverter;
     }
 
     public void initDictSliceMap(CachedTreeMap dictMap) throws IOException {
@@ -893,7 +885,7 @@ public class AppendTrieDictionary<T> extends Dictionary<T>
{
             } else {
                 logger.info("GlobalDict {} exist, append value", resourcePath);
                 builder = new Builder<>(resourcePath, dictToUse, dictToUse.baseDir,
dictToUse.maxId, dictToUse.maxValueLength,
-                    dictToUse.nValues, dictToUse.bytesConverter, dictToUse.writeDictMap());
+                    dictToUse.nValues, dictToUse.bytesConvert, dictToUse.writeDictMap());
             }
 
             return builder;
@@ -1156,31 +1148,6 @@ public class AppendTrieDictionary<T> extends Dictionary<T>
{
         return maxValueLength;
     }
 
-    @Override
-    final protected int getIdFromValueImpl(T value, int roundingFlag) {
-        if (enableValueCache && roundingFlag == 0) {
-            HashMap cache = valueToIdCache.get(); // SoftReference to skip cache gracefully
when short of memory
-            if (cache != null) {
-                Integer id = null;
-                id = (Integer) cache.get(value);
-                if (id != null)
-                    return id.intValue();
-
-                byte[] valueBytes = bytesConverter.convertToBytes(value);
-                id = getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
-
-                cache.put(value, id);
-                return id;
-            }
-        }
-        byte[] valueBytes = bytesConverter.convertToBytes(value);
-        return getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
-    }
-
-    @Override
-    final protected T getValueFromIdImpl(int id) {
-        throw new UnsupportedOperationException("AppendTrieDictionary can't retrive value
from id");
-    }
 
     @Override
     protected byte[] getValueBytesFromIdImpl(int id) {
@@ -1198,7 +1165,7 @@ public class AppendTrieDictionary<T> extends Dictionary<T>
{
             indexOut.writeInt(maxId);
             indexOut.writeInt(maxValueLength);
             indexOut.writeInt(nValues);
-            indexOut.writeUTF(bytesConverter.getClass().getName());
+            indexOut.writeUTF(bytesConvert.getClass().getName());
             dictSliceMap.write(indexOut);
             dictSliceMap.commit(keepAppend);
         }
@@ -1208,7 +1175,7 @@ public class AppendTrieDictionary<T> extends Dictionary<T>
{
     public AppendTrieDictionary copyToAnotherMeta(KylinConfig srcConfig, KylinConfig dstConfig)
throws IOException {
         Configuration conf = new Configuration();
         AppendTrieDictionary newDict = new AppendTrieDictionary();
-        newDict.initParams(baseDir.replaceFirst(srcConfig.getHdfsWorkingDirectory(), dstConfig.getHdfsWorkingDirectory()),
baseId, maxId, maxValueLength, nValues, bytesConverter);
+        newDict.initParams(baseDir.replaceFirst(srcConfig.getHdfsWorkingDirectory(), dstConfig.getHdfsWorkingDirectory()),
baseId, maxId, maxValueLength, nValues, bytesConvert);
         newDict.initDictSliceMap((CachedTreeMap)dictSliceMap);
         logger.info("Copy AppendDict from {} to {}", this.baseDir, newDict.baseDir);
         Path srcPath = new Path(this.baseDir);
@@ -1256,6 +1223,7 @@ public class AppendTrieDictionary<T> extends Dictionary<T>
{
         }
     }
 
+
     @Override
     public void dump(PrintStream out) {
         out.println("Total " + nValues + " values, " + (dictSliceMap == null ? 0 : dictSliceMap.size())
+ " slice");

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java
new file mode 100644
index 0000000..575358e
--- /dev/null
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.dict;
+
+import org.apache.kylin.common.util.Dictionary;
+
+import java.lang.ref.SoftReference;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * Created by xiefan on 16-12-30.
+ */
+abstract public class CacheDictionary<T> extends Dictionary<T> {
+    private static final long serialVersionUID = 1L;
+
+    transient protected boolean enableValueCache = true;
+
+    transient private SoftReference<Map> valueToIdCache;
+
+    transient private SoftReference<Object[]> idToValueCache;
+
+    transient protected int baseId;
+
+    transient protected BytesConverter<T> bytesConvert;
+
+    public CacheDictionary() {
+
+    }
+
+    //value --> id
+    @Override
+    final protected int getIdFromValueImpl(T value, int roundingFlag) {
+        if (enableValueCache && roundingFlag == 0) {
+            Map cache = valueToIdCache.get(); // SoftReference to skip cache gracefully when
short of memory
+            if (cache != null) {
+                Integer id = null;
+                id = (Integer) cache.get(value);
+                if (id != null)
+                    return id.intValue();
+                byte[] valueBytes = bytesConvert.convertToBytes(value);
+                id = getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
+                cache.put(value, id);
+                return id;
+            }
+        }
+        byte[] valueBytes = bytesConvert.convertToBytes(value);
+        return getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
+    }
+
+    //id --> value
+    @Override
+    final protected T getValueFromIdImpl(int id) {
+        if (enableValueCache) {
+            Object[] cache = idToValueCache.get();
+            if (cache != null) {
+                int seq = calcSeqNoFromId(id);
+                if (cache[seq] != null)
+                    return (T) cache[seq];
+                byte[] valueBytes = getValueBytesFromIdImpl(id);
+                T value = bytesConvert.convertFromBytes(valueBytes, 0, valueBytes.length);
+                cache[seq] = value;
+                return value;
+            }
+        }
+        byte[] valueBytes = getValueBytesFromIdImpl(id);
+        return bytesConvert.convertFromBytes(valueBytes, 0, valueBytes.length);
+    }
+
+    final protected int calcSeqNoFromId(int id) {
+        int seq = id - baseId;
+        if (seq < 0 || seq >= getSize()) {
+            throw new IllegalArgumentException("Not a valid ID: " + id);
+        }
+        return seq;
+    }
+
+    final public void enableCache() {
+        this.enableValueCache = true;
+        if (this.valueToIdCache == null)
+            this.valueToIdCache = new SoftReference<Map>(new ConcurrentHashMap());
+        if (this.idToValueCache == null)
+            this.idToValueCache = new SoftReference<Object[]>(new Object[getSize()]);
+    }
+
+    final public void disableCache() {
+        this.enableValueCache = false;
+        this.valueToIdCache = null;
+        this.idToValueCache = null;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
index ee8534f..29bbee2 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java
@@ -156,6 +156,7 @@ public class DateStrDictionary extends Dictionary<String> {
         init(pattern, baseId);
     }
 
+
     @Override
     public int hashCode() {
         return 31 * baseId + pattern.hashCode();

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java
index 12efbd3..f1b1b3d 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java
@@ -194,30 +194,6 @@ public class NumberDictionary<T> extends TrieDictionary<T>
{
         return codec.decodeNumber(returnValue, offset);
     }
 
-    @Override
-    public void enableIdToValueBytesCache() {
-        enableIdToValueBytesCache(new EnableIdToValueBytesCacheVisitor() {
-            NumberBytesCodec codec = getCodec();
-            byte[] tmp = new byte[getSizeOfValue()];
-
-            @Override
-            public byte[] getBuffer() {
-                return codec.buf;
-            }
-
-            @Override
-            public byte[] makeValueBytes(byte[] buf, int length) {
-                // the given buf is the codec buf, which we returned in getBuffer()
-                codec.bufOffset = 0;
-                codec.bufLen = length;
-                int numLen = codec.decodeNumber(tmp, 0);
-
-                byte[] result = new byte[numLen];
-                System.arraycopy(tmp, 0, result, 0, numLen);
-                return result;
-            }
-        });
-    }
 
     public static void main(String[] args) throws Exception {
         NumberDictionaryBuilder<String> b = new NumberDictionaryBuilder<String>(new
StringBytesConverter());
@@ -227,7 +203,7 @@ public class NumberDictionary<T> extends TrieDictionary<T>
{
         b.addValue("7");
         TrieDictionary<String> dict = b.build(0);
 
-        dict.enableIdToValueBytesCache();
+        //dict.enableIdToValueBytesCache();
         for (int i = 0; i <= dict.getMaxId(); i++) {
             System.out.println(Bytes.toString(dict.getValueBytesFromId(i)));
         }

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java
index fc3db5f..eabc9f1 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java
@@ -147,4 +147,5 @@ public class TimeStrDictionary extends Dictionary<String> {
     @Override
     public void readFields(DataInput in) throws IOException {
     }
+
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
index c099de0..957207e 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java
@@ -27,10 +27,8 @@ import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.io.PrintStream;
-import java.lang.ref.SoftReference;
 import java.util.Arrays;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
+
 
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.common.util.BytesUtil;
@@ -55,7 +53,7 @@ import com.google.common.base.Preconditions;
  * @author yangli9
  */
 @SuppressWarnings({ "rawtypes", "unchecked" })
-public class TrieDictionary<T> extends Dictionary<T> {
+public class TrieDictionary<T> extends CacheDictionary<T> {
     private static final long serialVersionUID = 1L;
 
     public static final byte[] MAGIC = new byte[] { 0x54, 0x72, 0x69, 0x65, 0x44, 0x69, 0x63,
0x74 }; // "TrieDict"
@@ -74,21 +72,13 @@ public class TrieDictionary<T> extends Dictionary<T> {
     transient private int bodyLen;
     transient private int sizeChildOffset;
     transient private int sizeNoValuesBeneath;
-    transient private int baseId;
     transient private int maxValueLength;
-    transient private BytesConverter<T> bytesConvert;
 
     transient private int nValues;
     transient private int sizeOfId;
     transient private long childOffsetMask;
     transient private int firstByteOffset;
 
-    transient private boolean enableValueCache = true;
-    transient private SoftReference<Map> valueToIdCache;
-    transient private SoftReference<Object[]> idToValueCache;
-
-    transient private boolean enableIdToValueBytesCache = false;
-    transient private byte[][] idToValueBytesCache;
 
     public TrieDictionary() { // default constructor for Writable interface
     }
@@ -120,16 +110,13 @@ public class TrieDictionary<T> extends Dictionary<T> {
             this.sizeOfId = BytesUtil.sizeForValue(baseId + nValues + 1); // note baseId
could raise 1 byte in ID space, +1 to reserve all 0xFF for NULL case
             this.childOffsetMask = ~((long) (BIT_IS_LAST_CHILD | BIT_IS_END_OF_VALUE) <<
((sizeChildOffset - 1) * 8));
             this.firstByteOffset = sizeChildOffset + sizeNoValuesBeneath + 1; // the offset
from begin of node to its first value byte
+            enableCache();
         } catch (Exception e) {
             if (e instanceof RuntimeException)
                 throw (RuntimeException) e;
             else
                 throw new RuntimeException(e);
         }
-        if (enableValueCache) {
-            valueToIdCache = new SoftReference<Map>(new ConcurrentHashMap());
-            idToValueCache = new SoftReference<Object[]>(new Object[nValues]);
-        }
     }
 
     @Override
@@ -152,26 +139,6 @@ public class TrieDictionary<T> extends Dictionary<T> {
         return maxValueLength;
     }
 
-    @Override
-    final protected int getIdFromValueImpl(T value, int roundingFlag) {
-        if (enableValueCache && roundingFlag == 0) {
-            Map cache = valueToIdCache.get(); // SoftReference to skip cache gracefully when
short of memory
-            if (cache != null) {
-                Integer id = null;
-                id = (Integer) cache.get(value);
-                if (id != null)
-                    return id.intValue();
-
-                byte[] valueBytes = bytesConvert.convertToBytes(value);
-                id = getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
-
-                cache.put(value, id);
-                return id;
-            }
-        }
-        byte[] valueBytes = bytesConvert.convertToBytes(value);
-        return getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag);
-    }
 
     @Override
     protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag)
{
@@ -267,35 +234,9 @@ public class TrieDictionary<T> extends Dictionary<T> {
             return k;
     }
 
-    @Override
-    final protected T getValueFromIdImpl(int id) {
-        if (enableValueCache) {
-            Object[] cache = idToValueCache.get(); // SoftReference to skip cache gracefully
when short of memory
-            if (cache != null) {
-                int seq = calcSeqNoFromId(id);
-                if (cache[seq] != null)
-                    return (T) cache[seq];
-
-                byte[] value = new byte[getSizeOfValue()];
-                int length = getValueBytesFromId(id, value, 0);
-                T result = bytesConvert.convertFromBytes(value, 0, length);
-
-                cache[seq] = result;
-                return result;
-            }
-        }
-        byte[] value = new byte[getSizeOfValue()];
-        int length = getValueBytesFromId(id, value, 0);
-        return bytesConvert.convertFromBytes(value, 0, length);
-    }
 
     @Override
     protected byte[] getValueBytesFromIdImpl(int id) {
-        if (enableIdToValueBytesCache) {
-            int seq = calcSeqNoFromId(id);
-            return idToValueBytesCache[seq];
-        }
-
         byte[] buf = new byte[maxValueLength];
         int len = getValueBytesFromIdImpl(id, buf, 0);
 
@@ -363,68 +304,6 @@ public class TrieDictionary<T> extends Dictionary<T> {
         }
     }
 
-    public void enableIdToValueBytesCache() {
-        enableIdToValueBytesCache(new EnableIdToValueBytesCacheVisitor() {
-            @Override
-            public byte[] getBuffer() {
-                return new byte[getSizeOfValue()];
-            }
-
-            @Override
-            public byte[] makeValueBytes(byte[] buf, int length) {
-                byte[] valueBytes = new byte[length];
-                System.arraycopy(buf, 0, valueBytes, 0, length);
-                return valueBytes;
-            }
-        });
-    }
-
-    interface EnableIdToValueBytesCacheVisitor {
-        byte[] getBuffer();
-
-        byte[] makeValueBytes(byte[] buf, int length);
-    }
-
-    protected void enableIdToValueBytesCache(EnableIdToValueBytesCacheVisitor visitor) {
-        enableIdToValueBytesCache = true;
-        idToValueBytesCache = new byte[nValues][];
-        enableIdToValueBytesCache_recursion(headSize, 0, visitor.getBuffer(), 0, visitor);
-    }
-
-    private void enableIdToValueBytesCache_recursion(int n, int seq, byte[] buf, int tail,
EnableIdToValueBytesCacheVisitor visitor) {
-        // write current node value
-        int p = n + firstByteOffset;
-        int len = BytesUtil.readUnsigned(trieBytes, p - 1, 1);
-        System.arraycopy(trieBytes, p, buf, tail, len);
-        tail += len;
-
-        // if the value is ended
-        boolean isEndOfValue = checkFlag(n, BIT_IS_END_OF_VALUE);
-        if (isEndOfValue) {
-            idToValueBytesCache[seq] = visitor.makeValueBytes(buf, tail);
-            seq++;
-        }
-
-        // find a child to continue
-        int c = getChildOffset(n);
-        if (c == headSize) // has no children 
-            return;
-
-        // process each child
-        while (true) {
-            enableIdToValueBytesCache_recursion(c, seq, buf, tail, visitor);
-
-            int nValuesBeneath = BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath);
-            seq += nValuesBeneath;
-
-            // go next child
-            if (checkFlag(c, BIT_IS_LAST_CHILD))
-                break; // no more child? we are done
-            p = c + firstByteOffset;
-            c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1);
-        }
-    }
-
     private boolean checkFlag(int offset, int bit) {
         return (trieBytes[offset] & bit) > 0;
     }
@@ -436,14 +315,6 @@ public class TrieDictionary<T> extends Dictionary<T> {
             return baseId + seq;
     }
 
-    private int calcSeqNoFromId(int id) {
-        int seq = id - baseId;
-        if (seq < 0 || seq >= nValues) {
-            throw new IllegalArgumentException("Not a valid ID: " + id);
-        }
-        return seq;
-    }
-
     @Override
     public void write(DataOutput out) throws IOException {
         out.write(trieBytes);
@@ -552,7 +423,7 @@ public class TrieDictionary<T> extends Dictionary<T> {
         Preconditions.checkArgument(dict2.contains(dict));
         Preconditions.checkArgument(dict.equals(dict2));
 
-        dict2.enableIdToValueBytesCache();
+        //dict2.enableIdToValueBytesCache();
         for (int i = 0; i <= dict.getMaxId(); i++) {
             System.out.println(Bytes.toString(dict.getValueBytesFromId(i)));
         }

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
index e746348..c655854 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
@@ -27,6 +27,7 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
+
 import org.apache.kylin.common.util.ByteArray;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.common.util.BytesUtil;
@@ -40,18 +41,14 @@ import org.apache.kylin.common.util.Dictionary;
  * <p>
  * Created by xiefan on 16-10-26.
  */
-public class TrieDictionaryForest<T> extends Dictionary<T> {
+public class TrieDictionaryForest<T> extends CacheDictionary<T> {
     private static final long serialVersionUID = 1L;
 
     private ArrayList<TrieDictionary<T>> trees;
 
     private ArrayList<ByteArray> valueDivide;
 
-    private ArrayList<Integer> accuOffset; //find tree
-
-    private BytesConverter<T> bytesConvert;
-
-    private int baseId;
+    private ArrayList<Integer> accuOffset;
 
     private ArrayList<ByteArray> maxValue;
 
@@ -66,6 +63,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T> {
         this.bytesConvert = bytesConverter;
         this.baseId = baseId;
         initMaxValue();
+        enableCache();
     }
 
     @Override
@@ -102,12 +100,6 @@ public class TrieDictionaryForest<T> extends Dictionary<T>
{
         return maxValue;
     }
 
-    // value --> id
-    @Override
-    protected int getIdFromValueImpl(T value, int roundingFlag) throws IllegalArgumentException
{
-        byte[] valueBytes = bytesConvert.convertToBytes(value);
-        return getIdFromValueBytesImpl(valueBytes, 0, valueBytes.length, roundingFlag);
-    }
 
     @Override
     protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag)
throws IllegalArgumentException {
@@ -148,15 +140,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T>
{
         return id;
     }
 
-    @Override
-    protected T getValueFromIdImpl(int id) throws IllegalArgumentException {
-        byte[] data = getValueBytesFromIdImpl(id);
-        if (data != null) {
-            return bytesConvert.convertFromBytes(data, 0, data.length);
-        } else {
-            return null;
-        }
-    }
+
 
     @Override
     protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) throws
IllegalArgumentException {
@@ -271,6 +255,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T>
{
                 trees.add(dict);
             }
             initMaxValue();
+            enableCache();
         } catch (Exception e) {
             if (e instanceof RuntimeException)
                 throw (RuntimeException) e;
@@ -383,22 +368,4 @@ public class TrieDictionaryForest<T> extends Dictionary<T>
{
         }
     }
 
-    public static void main(String[] args) {
-        ArrayList<String> list = new ArrayList<>();
-        list.add("一");
-        list.add("二");
-        list.add("三");
-        list.add("");
-        list.add("part");
-        list.add("par");
-        list.add("partition");
-        list.add("party");
-        list.add("parties");
-        list.add("paint");
-        Collections.sort(list);
-        for (String str : list) {
-            System.out.println("found value:" + str + " index:" + lowerBound(str, list));
-        }
-    }
-
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
index 4ee30f0..af2e302 100755
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
@@ -111,7 +111,6 @@ public class TrieDictionaryForestBuilder<T> {
             reset();
         }
         TrieDictionaryForest<T> forest = new TrieDictionaryForest<T>(this.trees,
this.valueDivide, this.accuOffset, this.bytesConverter, baseId);
-
         // if input values are not in ascending order and tree num>1,TrieDictionaryForest
can not work correctly.
         if (forest.getTrees().size() > 1 && !isOrdered) {
             throw new IllegalStateException("Invalid input data. Unordered data can not be
split into multi trees");

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
b/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
index ad166c2..73e0935 100644
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/MultipleDictionaryValueEnumeratorTest.java
@@ -158,6 +158,7 @@ public class MultipleDictionaryValueEnumeratorTest {
         @Override
         public void readFields(DataInput in) throws IOException {}
 
+
         @Override
         public boolean contains(Dictionary another) {
             return false;

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestBenchmark.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestBenchmark.java
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestBenchmark.java
new file mode 100644
index 0000000..0b4c0e3
--- /dev/null
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestBenchmark.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.dict;
+
+import org.apache.kylin.common.util.Dictionary;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+import java.util.UUID;
+
+/**
+ * Created by xiefan on 16-12-28.
+ */
+@Ignore
+public class TrieDictionaryForestBenchmark {
+
+    private static final Random rand = new Random(System.currentTimeMillis());
+
+    private CacheDictionary<String> oldDict;
+
+    private CacheDictionary<String> newDict;
+
+    private ArrayList<String> rawData;
+
+    private int cardnality = 100;
+
+    private int testTimes = 100000;
+
+    @Before
+    public void before() {
+        int dataSize = 100 * 10000;
+        TrieDictionaryBuilder<String> b1 = new TrieDictionaryBuilder<>(new StringBytesConverter());
+        TrieDictionaryForestBuilder<String> b2 = new TrieDictionaryForestBuilder<String>(new
StringBytesConverter(), 0, 5);
+        this.rawData = genStringDataSet(dataSize);
+        for (String str : this.rawData) {
+            b1.addValue(str);
+            b2.addValue(str);
+        }
+        this.oldDict = b1.build(0);
+        this.newDict = b2.build();
+        System.out.println("new dict split tree size : " + ((TrieDictionaryForest<String>)
newDict).getTrees().size());
+    }
+
+    @Test
+    public void testAll() {
+        benchmarkWithoutCache();
+        benchmarkWithCache();
+    }
+
+    @Test
+    public void benchmarkWithoutCache() {
+        oldDict.disableCache();
+        newDict.disableCache();
+        runBenchmark("benchmarkWithoutCache");
+    }
+
+    @Test
+    public void benchmarkWithCache() {
+        oldDict.enableCache();
+        newDict.enableCache();
+        runBenchmark("benchmarkWithCache");
+    }
+
+    private void runBenchmark(String testName) {
+        long oldTime = runQueryValue(oldDict, cardnality, testTimes);
+        long oldTime2 = runQueryId(rawData, oldDict, cardnality, testTimes);
+        long oldTime3 = runQueryValueBytes(oldDict, cardnality, testTimes);
+        long oldTime4 = runQueryValueBytes2(oldDict, cardnality, testTimes);
+        long oldTime5 = runQueryIdByValueBytes(rawData, oldDict, cardnality, testTimes);
+        long newTime = runQueryValue(newDict, cardnality, testTimes);
+        long newTime2 = runQueryId(rawData, newDict, cardnality, testTimes);
+        long newTime3 = runQueryValueBytes(newDict, cardnality, testTimes);
+        long newTime4 = runQueryValueBytes2(newDict, cardnality, testTimes);
+        long newTime5 = runQueryIdByValueBytes(rawData, newDict, cardnality, testTimes);
+        System.out.println(testName);
+        System.out.println("old dict value --> id : " + oldTime2);
+        System.out.println("new dict value --> id :" + newTime2);
+        System.out.println("old dict value bytes --> id : " + oldTime5);
+        System.out.println("new dict value bytes--> id :" + newTime5);
+        System.out.println("old dict id --> value : " + oldTime);
+        System.out.println("new dict id --> value : " + newTime);
+        System.out.println("old dict id --> value bytes : " + oldTime3);
+        System.out.println("new dict id --> value bytes : " + newTime3);
+        System.out.println("old dict id --> value bytes (method 2): " + oldTime4);
+        System.out.println("new dict id --> value bytes (method 2): " + newTime4);
+    }
+
+    //id -- value
+    private long runQueryValue(Dictionary<String> dict, int cardnality, int testTimes)
{
+        long startTime = System.currentTimeMillis();
+        int step = 1;
+        for (int i = 0; i < testTimes; i++) {
+            for (int j = 0; j < cardnality; j++) {
+                step |= dict.getValueFromId(j).length();
+            }
+        }
+        return System.currentTimeMillis() - startTime;
+    }
+
+    private long runQueryValueBytes(Dictionary<String> dict, int cardnality, int testTimes)
{
+        long startTime = System.currentTimeMillis();
+        int step = 1;
+        for (int i = 0; i < testTimes; i++) {
+            for (int j = 0; j < cardnality; j++) {
+                step |= dict.getValueBytesFromId(j).length;
+            }
+        }
+        return System.currentTimeMillis() - startTime;
+    }
+
+    private long runQueryValueBytes2(Dictionary<String> dict, int cardnality, int testTimes)
{
+        long startTime = System.currentTimeMillis();
+        int step = 1;
+        byte[] returnValue = new byte[2048];
+        for (int i = 0; i < testTimes; i++) {
+            for (int j = 0; j < cardnality; j++) {
+                int size = dict.getValueBytesFromId(j, returnValue, 0);
+                step |= size;
+            }
+        }
+        return System.currentTimeMillis() - startTime;
+    }
+
+    private long runQueryId(ArrayList<String> rawData, Dictionary<String> dict,
int cardnality, int testTimes) {
+        long startTime = System.currentTimeMillis();
+        int step = 1;
+        for (int i = 0; i < testTimes; i++) {
+            for (int j = 0; j < cardnality; j++) {
+                step |= dict.getIdFromValue(rawData.get(j));
+            }
+        }
+        return System.currentTimeMillis() - startTime;
+    }
+
+    private long runQueryIdByValueBytes(ArrayList<String> rawData, Dictionary<String>
dict, int cardnality, int testTimes) {
+        List<byte[]> testBytes = new ArrayList<>();
+        StringBytesConverter converter = new StringBytesConverter();
+        for (int i = 0; i < cardnality; i++) {
+            testBytes.add(converter.convertToBytes(rawData.get(i)));
+        }
+        long startTime = System.currentTimeMillis();
+        int step = 1;
+        for (int i = 0; i < testTimes; i++) {
+            for (int j = 0; j < cardnality; j++) {
+                step |= dict.getIdFromValueBytes(testBytes.get(j), 0, testBytes.get(j).length);
+            }
+        }
+        return System.currentTimeMillis() - startTime;
+    }
+
+    private ArrayList<String> genStringDataSet(int totalSize) {
+        ArrayList<String> data = new ArrayList<>();
+        for (int i = 0; i < totalSize; i++) {
+            data.add(UUID.randomUUID().toString());
+        }
+        Collections.sort(data);
+        return data;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/1d6a36bf/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
index ee092c9..68cf301 100755
--- a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
@@ -58,7 +58,7 @@ public class TrieDictionaryForestTest {
         TrieDictionaryForest<String> dict = builder.build();
         assertSameBehaviorAsTrie(dict, strs, 0);
     }
-    
+
     @Test
     public void testBasicFound() {
         ArrayList<String> strs = new ArrayList<String>();
@@ -106,7 +106,7 @@ public class TrieDictionaryForestTest {
             assertEquals(expectId, dict.getIdFromValue(s));
             expectId++;
         }
-        
+
         assertSameBehaviorAsTrie(dict, strs, baseId);
     }
 
@@ -128,7 +128,7 @@ public class TrieDictionaryForestTest {
         assertEquals(255, id);
         id = dict.getIdFromValue(null, -1);
         assertEquals(255, id);
-        
+
         assertSameBehaviorAsTrie(dict, strs, 0);
     }
 
@@ -263,7 +263,6 @@ public class TrieDictionaryForestTest {
     }
 
     @Test
-    @Ignore
     public void categoryNamesTest() throws Exception {
         InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat");
         ArrayList<String> str = loadStrings(is);
@@ -284,19 +283,19 @@ public class TrieDictionaryForestTest {
     }
 
     @Test
-    public void emptyDictTest() throws Exception{
+    public void emptyDictTest() throws Exception {
         TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new
StringBytesConverter());
         TrieDictionaryForest<String> dict = b.build();
-        try{
+        try {
             int id = dict.getIdFromValue("123", 0);
             fail("id should not exist");
-        }catch (IllegalArgumentException e){
+        } catch (IllegalArgumentException e) {
             //right
         }
-        try{
+        try {
             String value = dict.getValueFromIdImpl(123);
             fail("value should not exist");
-        }catch (IllegalArgumentException e){
+        } catch (IllegalArgumentException e) {
             //right
         }
     }
@@ -732,12 +731,6 @@ public class TrieDictionaryForestTest {
         System.out.println("compare build time.  Old trie : " + oldDictTotalBuildTime / 1000.0
+ "s.New trie : " + newDictTotalBuildTime / 1000.0 + "s");
     }
 
-    @Test
-    public void queryTimeBenchmarkTest() throws Exception {
-        int count = (int) (Integer.MAX_VALUE * 0.8 / 640);
-        benchmarkStringDictionary(new RandomStrings(count));
-    }
-
     private void evaluateDataSize(ArrayList<String> list) {
         long size = 0;
         for (String str : list)
@@ -867,7 +860,6 @@ public class TrieDictionaryForestTest {
         int baseId = new Random().nextInt(100);
         TrieDictionaryForestBuilder<String> b = newDictBuilder(str, baseId, 2);
         TrieDictionaryForest<String> dict = b.build();
-        //dict.dump(System.out);
         TreeSet<String> set = new TreeSet<String>();
         for (String s : str) {
             set.add(s);
@@ -881,7 +873,7 @@ public class TrieDictionaryForestTest {
         int id = baseId;
         for (; it.hasNext(); id++) {
             String value = it.next();
-            // System.out.println("checking " + id + " <==> " + value);
+            //System.out.println("checking " + id + " <==> " + value);
 
             assertEquals(id, dict.getIdFromValue(value));
             assertEquals(value, dict.getValueFromId(id));
@@ -892,7 +884,7 @@ public class TrieDictionaryForestTest {
             for (String s : notFound) {
                 try {
                     int nullId = dict.getIdFromValue(s);
-                    System.out.println("null value id:" + nullId);
+                    //System.out.println("null value id:" + nullId);
                     fail("For not found value '" + s + "', IllegalArgumentException is expected");
                 } catch (IllegalArgumentException e) {
                     // good
@@ -938,8 +930,9 @@ public class TrieDictionaryForestTest {
     public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterable<String>
strs, int baseId, int treeSize) {
         TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new
StringBytesConverter(), baseId);
         b.setMaxTrieTreeSize(treeSize);
-        for (String s : strs)
+        for (String s : strs) {
             b.addValue(s);
+        }
         return b;
     }
 
@@ -950,7 +943,7 @@ public class TrieDictionaryForestTest {
             b.addValue(strs.next());
         return b;
     }
-    
+
     private static class RandomStrings implements Iterable<String> {
         final private int size;
 
@@ -1039,7 +1032,7 @@ public class TrieDictionaryForestTest {
                 trieBuilder.addValue(s);
         }
         TrieDictionary<String> trie = trieBuilder.build(baseId);
-        
+
         assertEquals(trie.getMaxId(), dict.getMaxId());
         assertEquals(trie.getMinId(), dict.getMinId());
         assertEquals(trie.getSize(), dict.getSize());


Mime
View raw message