kylin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From liy...@apache.org
Subject [5/8] kylin git commit: KYLIN-2283 replace old with new
Date Sun, 18 Dec 2016 01:09:43 GMT
KYLIN-2283 replace old with new


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/f119a559
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/f119a559
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/f119a559

Branch: refs/heads/KYLIN-2283
Commit: f119a5590b3f8e0203a543f767fab0fbe674f717
Parents: a74140e
Author: Yang Li <liyang@apache.org>
Authored: Sat Dec 17 08:03:10 2016 +0800
Committer: Yang Li <liyang@apache.org>
Committed: Sun Dec 18 08:24:21 2016 +0800

----------------------------------------------------------------------
 .../java/org/apache/kylin/job/DataGenTest.java  |  56 --
 .../java/org/apache/kylin/job/DeployUtil.java   |  27 +-
 .../apache/kylin/job/dataGen/ColumnConfig.java  |  80 ---
 .../kylin/job/dataGen/FactTableGenerator.java   | 696 -------------------
 .../org/apache/kylin/job/dataGen/GenConfig.java |  92 ---
 .../localmeta/data/data_gen_config.json         |  65 --
 6 files changed, 8 insertions(+), 1008 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java b/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java
deleted file mode 100644
index af4f9fb..0000000
--- a/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job;
-
-import static org.junit.Assert.assertTrue;
-
-import org.apache.kylin.common.util.LocalFileMetadataTestCase;
-import org.apache.kylin.job.dataGen.FactTableGenerator;
-import org.apache.kylin.metadata.MetadataManager;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- *
- */
-public class DataGenTest extends LocalFileMetadataTestCase {
-
-    @Before
-    public void before() throws Exception {
-        this.createTestMetadata();
-        MetadataManager.clearCache();
-    }
-
-    @After
-    public void after() throws Exception {
-        this.cleanupTestMetadata();
-    }
-
-    @Test
-    public void testBasics() throws Exception {
-        String content = FactTableGenerator.generate("test_kylin_cube_with_slr_ready", "10000",
"1", null);// default  settings
-        //System.out.println(content);
-        assertTrue(content.contains("FP-non GTC"));
-        assertTrue(content.contains("ABIN"));
-
-        //DeployUtil.overrideFactTableData(content, "default.test_kylin_fact");
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java b/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java
index 23b3670..f65d2a4 100644
--- a/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java
+++ b/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java
@@ -33,20 +33,21 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.common.persistence.ResourceTool;
+import org.apache.kylin.common.util.HiveCmdBuilder;
 import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.cube.CubeDescManager;
 import org.apache.kylin.cube.CubeInstance;
 import org.apache.kylin.cube.CubeManager;
-import org.apache.kylin.job.dataGen.FactTableGenerator;
 import org.apache.kylin.job.streaming.StreamDataLoader;
 import org.apache.kylin.job.streaming.StreamingTableDataGenerator;
 import org.apache.kylin.metadata.MetadataManager;
 import org.apache.kylin.metadata.model.ColumnDesc;
+import org.apache.kylin.metadata.model.DataModelDesc;
 import org.apache.kylin.metadata.model.TableDesc;
 import org.apache.kylin.metadata.model.TableRef;
 import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.source.datagen.ModelDataGenerator;
 import org.apache.kylin.source.hive.HiveClientFactory;
-import org.apache.kylin.common.util.HiveCmdBuilder;
 import org.apache.kylin.source.hive.IHiveClient;
 import org.apache.kylin.source.kafka.TimedJsonStreamParser;
 import org.apache.maven.model.Model;
@@ -131,16 +132,15 @@ public class DeployUtil {
 
     public static void prepareTestDataForNormalCubes(String cubeName) throws Exception {
 
-        String factTableName = TABLE_KYLIN_FACT.toUpperCase();
-        String content = null;
-
         boolean buildCubeUsingProvidedData = Boolean.parseBoolean(System.getProperty("buildCubeUsingProvidedData"));
         if (!buildCubeUsingProvidedData) {
             System.out.println("build cube with random dataset");
+            
             // data is generated according to cube descriptor and saved in resource store
-            content = FactTableGenerator.generate(cubeName, "10000", "0.6", null);
-            assert content != null;
-            overrideFactTableData(content, factTableName);
+            MetadataManager mgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
+            DataModelDesc model = mgr.getDataModelDesc("test_kylin_inner_join_model_desc");
+            ModelDataGenerator gen = new ModelDataGenerator(model, 1000);
+            gen.generate();
         } else {
             System.out.println("build normal cubes with provided dataset");
         }
@@ -168,17 +168,6 @@ public class DeployUtil {
         appendFactTableData(sb.toString(), cubeInstance.getRootFactTable());
     }
 
-    public static void overrideFactTableData(String factTableContent, String factTableName)
throws IOException {
-        // Write to resource store
-        ResourceStore store = ResourceStore.getStore(config());
-
-        InputStream in = new ByteArrayInputStream(factTableContent.getBytes("UTF-8"));
-        String factTablePath = "/data/" + factTableName + ".csv";
-        store.deleteResource(factTablePath);
-        store.putResource(factTablePath, in, System.currentTimeMillis());
-        in.close();
-    }
-
     public static void appendFactTableData(String factTableContent, String factTableName)
throws IOException {
         // Write to resource store
         ResourceStore store = ResourceStore.getStore(config());

http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
deleted file mode 100644
index 5e1c09f..0000000
--- a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.dataGen;
-
-import java.util.ArrayList;
-
-import com.fasterxml.jackson.annotation.JsonAutoDetect;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-/**
- */
-@JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.NONE, getterVisibility = JsonAutoDetect.Visibility.NONE,
isGetterVisibility = JsonAutoDetect.Visibility.NONE, setterVisibility = JsonAutoDetect.Visibility.NONE)
-public class ColumnConfig {
-    @JsonProperty("columnName")
-    private String columnName;
-    @JsonProperty("valueSet")
-    private ArrayList<String> valueSet;
-    @JsonProperty("exclusive")
-    private boolean exclusive;
-    @JsonProperty("asRange")
-    private boolean asRange;
-    @JsonProperty("differentiateByDateBoundary")
-    private boolean differentiateByDateBoundary;
-
-    public boolean isAsRange() {
-        return asRange;
-    }
-
-    public void setAsRange(boolean asRange) {
-        this.asRange = asRange;
-    }
-
-    public boolean isExclusive() {
-        return exclusive;
-    }
-
-    public void setExclusive(boolean exclusive) {
-        this.exclusive = exclusive;
-    }
-
-    public String getColumnName() {
-        return columnName;
-    }
-
-    public void setColumnName(String columnName) {
-        this.columnName = columnName;
-    }
-
-    public ArrayList<String> getValueSet() {
-        return valueSet;
-    }
-
-    public void setValueSet(ArrayList<String> valueSet) {
-        this.valueSet = valueSet;
-    }
-
-    public boolean isDifferentiateByDateBoundary() {
-        return differentiateByDateBoundary;
-    }
-
-    public void setDifferentiateByDateBoundary(boolean differentiateByDateBoundary) {
-        this.differentiateByDateBoundary = differentiateByDateBoundary;
-    }
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
deleted file mode 100644
index 011035b..0000000
--- a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java
+++ /dev/null
@@ -1,696 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.dataGen;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Date;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.common.persistence.ResourceStore;
-import org.apache.kylin.common.util.Array;
-import org.apache.kylin.cube.CubeInstance;
-import org.apache.kylin.cube.CubeManager;
-import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.cube.model.DimensionDesc;
-import org.apache.kylin.metadata.MetadataManager;
-import org.apache.kylin.metadata.datatype.DataType;
-import org.apache.kylin.metadata.model.ColumnDesc;
-import org.apache.kylin.metadata.model.JoinDesc;
-import org.apache.kylin.metadata.model.MeasureDesc;
-import org.apache.kylin.metadata.model.TblColRef;
-
-import com.google.common.collect.Lists;
-
-/**
- */
-public class FactTableGenerator {
-    CubeInstance cube = null;
-    CubeDesc desc = null;
-    ResourceStore store = null;
-    String factTableName = null;
-
-    GenConfig genConf = null;
-
-    Random r = null;
-
-    String cubeName;
-    long randomSeed;
-    int rowCount;
-    int unlinkableRowCount;
-    int unlinkableRowCountMax;
-    double conflictRatio;
-    double linkableRatio;
-
-    long differentiateBoundary = -1;
-    List<Integer> differentiateColumns = Lists.newArrayList();
-
-    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
-
-    // the names of lookup table columns which is in relation with fact
-    // table(appear as fk in fact table)
-    TreeMap<String, LinkedList<String>> lookupTableKeys = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
-
-    // possible values of lookupTableKeys, extracted from existing lookup tables.
-    // The key is in the format of tablename/columnname
-    TreeMap<String, ArrayList<String>> feasibleValues = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
-
-    // lookup table name -> sets of all composite keys
-    TreeMap<String, HashSet<Array<String>>> lookupTableCompositeKeyValues
= new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
-
-    private void init(String cubeName, int rowCount, double conflictRaio, double linkableRatio,
long randomSeed) {
-        this.rowCount = rowCount;
-        this.conflictRatio = conflictRaio;
-        this.cubeName = cubeName;
-        this.randomSeed = randomSeed;
-        this.linkableRatio = linkableRatio;
-
-        this.unlinkableRowCountMax = (int) (this.rowCount * (1 - linkableRatio));
-        this.unlinkableRowCount = 0;
-
-        r = new Random(randomSeed);
-
-        KylinConfig config = KylinConfig.getInstanceFromEnv();
-        cube = CubeManager.getInstance(config).getCube(cubeName);
-        desc = cube.getDescriptor();
-        factTableName = cube.getRootFactTable();
-        store = ResourceStore.getStore(config);
-    }
-
-    /*
-     * users can specify the value preference for each column
-     */
-    private void loadConfig() {
-        try {
-            InputStream configStream = store.getResource("/data/data_gen_config.json").inputStream;
-            this.genConf = GenConfig.loadConfig(configStream);
-
-            if (configStream != null)
-                configStream.close();
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-    }
-
-    private void loadLookupTableValues(String lookupTableName, LinkedList<String> columnNames,
int distinctRowCount) throws Exception {
-        KylinConfig config = KylinConfig.getInstanceFromEnv();
-
-        // only deal with composite keys
-        if (columnNames.size() > 1 && !lookupTableCompositeKeyValues.containsKey(lookupTableName))
{
-            lookupTableCompositeKeyValues.put(lookupTableName, new HashSet<Array<String>>());
-        }
-
-        InputStream tableStream = null;
-        BufferedReader tableReader = null;
-        try {
-            TreeMap<String, Integer> zeroBasedInice = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
-            for (String columnName : columnNames) {
-                ColumnDesc cDesc = MetadataManager.getInstance(config).getTableDesc(lookupTableName).findColumnByName(columnName);
-                zeroBasedInice.put(columnName, cDesc.getZeroBasedIndex());
-            }
-
-            String path = "/data/" + lookupTableName + ".csv";
-            tableStream = store.getResource(path).inputStream;
-            tableReader = new BufferedReader(new InputStreamReader(tableStream));
-            tableReader.mark(0);
-            int rowCount = 0;
-            int curRowNum = 0;
-            String curRow;
-
-            while (tableReader.readLine() != null)
-                rowCount++;
-
-            HashSet<Integer> rows = new HashSet<Integer>();
-            distinctRowCount = (distinctRowCount < rowCount) ? distinctRowCount : rowCount;
-            while (rows.size() < distinctRowCount) {
-                rows.add(r.nextInt(rowCount));
-            }
-
-            // reopen the stream
-            tableReader.close();
-            tableStream.close();
-            tableStream = null;
-            tableReader = null;
-
-            tableStream = store.getResource(path).inputStream;
-            tableReader = new BufferedReader(new InputStreamReader(tableStream));
-
-            while ((curRow = tableReader.readLine()) != null) {
-                if (rows.contains(curRowNum)) {
-                    String[] tokens = curRow.split(",");
-
-                    String[] comboKeys = null;
-                    int index = 0;
-                    if (columnNames.size() > 1)
-                        comboKeys = new String[columnNames.size()];
-
-                    for (String columnName : columnNames) {
-                        int zeroBasedIndex = zeroBasedInice.get(columnName);
-                        if (!feasibleValues.containsKey(lookupTableName + "/" + columnName))
-                            feasibleValues.put(lookupTableName + "/" + columnName, new ArrayList<String>());
-                        feasibleValues.get(lookupTableName + "/" + columnName).add(tokens[zeroBasedIndex]);
-
-                        if (columnNames.size() > 1) {
-                            comboKeys[index] = tokens[zeroBasedIndex];
-                            index++;
-                        }
-                    }
-
-                    if (columnNames.size() > 1) {
-                        Array<String> wrap = new Array<String>(comboKeys);
-                        if (lookupTableCompositeKeyValues.get(lookupTableName).contains(wrap))
{
-                            throw new Exception("The composite key already exist in the lookup
table");
-                        }
-                        lookupTableCompositeKeyValues.get(lookupTableName).add(wrap);
-                    }
-                }
-                curRowNum++;
-            }
-
-        } catch (IOException e) {
-            e.printStackTrace();
-            System.exit(1);
-        } finally {
-            if (tableStream != null)
-                tableStream.close();
-            if (tableReader != null)
-                tableReader.close();
-        }
-    }
-
-    // prepare the candidate values for each joined column
-    private void prepare() throws Exception {
-        // load config
-        loadConfig();
-
-        int index = 0;
-        for (ColumnDesc cDesc : MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()).getTableDesc(factTableName).getColumns())
{
-            ColumnConfig cConfig = genConf.getColumnConfigByName(cDesc.getName());
-
-            if (cConfig != null && cConfig.isDifferentiateByDateBoundary()) {
-                if (!cDesc.getType().isStringFamily()) {
-                    throw new IllegalStateException("differentiateByDateBoundary only applies
to text types, actual:" + cDesc.getType());
-                }
-                if (genConf.getDifferentiateBoundary() == null) {
-                    throw new IllegalStateException("differentiateBoundary not provided");
-                }
-                if (differentiateBoundary == -1) {
-                    differentiateBoundary = format.parse(genConf.getDifferentiateBoundary()).getTime();
-                }
-                differentiateColumns.add(index);
-            }
-            index++;
-        }
-
-        TreeSet<String> factTableColumns = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
-
-        for (DimensionDesc dim : desc.getDimensions()) {
-            for (TblColRef col : dim.getColumnRefs()) {
-                if (col.getTable().equals(factTableName))
-                    factTableColumns.add(col.getName());
-            }
-
-            JoinDesc join = dim.getJoin();
-            if (join != null) {
-                String lookupTable = dim.getTableRef().getTableIdentity();
-                for (String column : dropAlias(join.getPrimaryKey())) {
-                    if (!lookupTableKeys.containsKey(lookupTable)) {
-                        lookupTableKeys.put(lookupTable, new LinkedList<String>());
-                    }
-
-                    if (!lookupTableKeys.get(lookupTable).contains(column))
-                        lookupTableKeys.get(lookupTable).add(column);
-                }
-            }
-        }
-
-        int distinctRowCount = (int) (this.rowCount / this.conflictRatio);
-        distinctRowCount = (distinctRowCount == 0) ? 1 : distinctRowCount;
-        // lookup tables
-        for (String lookupTable : lookupTableKeys.keySet()) {
-            this.loadLookupTableValues(lookupTable, lookupTableKeys.get(lookupTable), distinctRowCount);
-        }
-    }
-
-    private List<DimensionDesc> getSortedDimentsionDescs() {
-        List<DimensionDesc> dimensions = desc.getDimensions();
-        Collections.sort(dimensions, new Comparator<DimensionDesc>() {
-            @Override
-            public int compare(DimensionDesc o1, DimensionDesc o2) {
-                JoinDesc j1 = o2.getJoin();
-                JoinDesc j2 = o1.getJoin();
-                return Integer.valueOf(j1 != null ? j1.getPrimaryKey().length : 0).compareTo(j2
!= null ? j2.getPrimaryKey().length : 0);
-            }
-        });
-        return dimensions;
-    }
-
-    /**
-     * Generate the fact table and return it as text
-     *
-     * @return
-     * @throws Exception
-     */
-    private String cookData() throws Exception {
-        // the columns on the fact table can be classified into three groups:
-        // 1. foreign keys
-        TreeMap<String, String> factTableCol2LookupCol = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
-        // 2. metrics or directly used dimensions
-        TreeSet<String> usedCols = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
-        // 3. others, not referenced anywhere
-
-        TreeMap<String, String> lookupCol2factTableCol = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
-
-        // find fact table columns in fks
-        List<DimensionDesc> dimensions = getSortedDimentsionDescs();
-        for (DimensionDesc dim : dimensions) {
-            JoinDesc jDesc = dim.getJoin();
-            if (jDesc != null) {
-                String[] fks = dropAlias(jDesc.getForeignKey());
-                String[] pks = dropAlias(jDesc.getPrimaryKey());
-                int num = fks.length;
-                for (int i = 0; i < num; ++i) {
-                    String value = dim.getTableRef().getTableIdentity() + "/" + pks[i];
-
-                    lookupCol2factTableCol.put(value, fks[i]);
-
-                    if (factTableCol2LookupCol.containsKey(fks[i])) {
-                        if (!factTableCol2LookupCol.get(fks[i]).equals(value)) {
-                            System.out.println("Warning: Disambiguation on the mapping of
column " + fks[i] + ", " + factTableCol2LookupCol.get(fks[i]) + "(chosen) or " + value);
-                            continue;
-                        }
-                    }
-                    factTableCol2LookupCol.put(fks[i], value);
-                }
-            }
-            //else, deal with it in next roung
-        }
-
-        // find fact table columns in direct dimension
-        // DO NOT merge this with the previous loop
-        for (DimensionDesc dim : dimensions) {
-            JoinDesc jDesc = dim.getJoin();
-            if (jDesc == null) {
-                // column on fact table used directly as a dimension
-                String aColumn = dim.getColumn();
-                if (!factTableCol2LookupCol.containsKey(aColumn))
-                    usedCols.add(aColumn);
-            }
-        }
-
-        // find fact table columns in measures
-        for (MeasureDesc mDesc : desc.getMeasures()) {
-            List<TblColRef> pcols = mDesc.getFunction().getParameter().getColRefs();
-            if (pcols != null) {
-                for (TblColRef col : pcols) {
-                    if (!factTableCol2LookupCol.containsKey(col.getName()))
-                        usedCols.add(col.getName());
-                }
-            }
-        }
-
-        return createTable(this.rowCount, factTableCol2LookupCol, lookupCol2factTableCol,
usedCols);
-    }
-
-    private String[] dropAlias(String[] aliasDotCol) {
-        String[] result = new String[aliasDotCol.length];
-        for (int i = 0; i < aliasDotCol.length; i++) {
-            String str = aliasDotCol[i];
-            int cut = str.lastIndexOf('.');
-            if (cut >= 0) {
-                str = str.substring(cut + 1);
-            }
-            result[i] = str;
-        }
-        return result;
-    }
-
-    private String normToTwoDigits(int v) {
-        if (v < 10)
-            return "0" + v;
-        else
-            return Integer.toString(v);
-    }
-
-    private String randomPick(ArrayList<String> candidates) {
-        int index = r.nextInt(candidates.size());
-        return candidates.get(index);
-    }
-
-    private String createRandomCell(ColumnDesc cDesc, ArrayList<String> range) throws
Exception {
-        DataType type = cDesc.getType();
-        if (type.isStringFamily()) {
-            throw new Exception("Can't handle range values for string");
-
-        } else if (type.isIntegerFamily()) {
-            int low = Integer.parseInt(range.get(0));
-            int high = Integer.parseInt(range.get(1));
-            return Integer.toString(r.nextInt(high - low) + low);
-
-        } else if (type.isDouble()) {
-            double low = Double.parseDouble(range.get(0));
-            double high = Double.parseDouble(range.get(1));
-            return String.format("%.4f", r.nextDouble() * (high - low) + low);
-
-        } else if (type.isFloat()) {
-            float low = Float.parseFloat(range.get(0));
-            float high = Float.parseFloat(range.get(1));
-            return String.format("%.4f", r.nextFloat() * (high - low) + low);
-
-        } else if (type.isDecimal()) {
-            double low = Double.parseDouble(range.get(0));
-            double high = Double.parseDouble(range.get(1));
-            return String.format("%.4f", r.nextDouble() * (high - low) + low);
-
-        } else if (type.isDateTimeFamily()) {
-            if (!type.isDate()) {
-                throw new RuntimeException("Does not support " + type);
-            }
-
-            Date start = format.parse(range.get(0));
-            Date end = format.parse(range.get(1));
-            long diff = end.getTime() - start.getTime();
-            Date temp = new Date(start.getTime() + (long) (diff * r.nextDouble()));
-            Calendar cal = Calendar.getInstance();
-            cal.setTime(temp);
-            // first day
-            cal.set(Calendar.DAY_OF_WEEK, cal.getFirstDayOfWeek());
-
-            return cal.get(Calendar.YEAR) + "-" + normToTwoDigits(cal.get(Calendar.MONTH)
+ 1) + "-" + normToTwoDigits(cal.get(Calendar.DAY_OF_MONTH));
-        } else {
-            System.out.println("The data type " + type + "is not recognized");
-            System.exit(1);
-        }
-        return null;
-    }
-
-    private String createRandomCell(ColumnDesc cDesc) {
-        DataType type =cDesc.getType();
-        String s = type.getName();
-        if (s.equals("char") || s.equals("varchar")) {
-            StringBuilder sb = new StringBuilder();
-            int len = Math.min(type.getPrecision(), 3);
-            for (int i = 0; i < len; i++) {
-                sb.append((char) ('a' + r.nextInt(10)));  // cardinality at most 10x10x10
-            }
-            return sb.toString();
-        } else if (s.equals("bigint") || s.equals("int") || s.equals("tinyint") || s.equals("smallint"))
{
-            return Integer.toString(r.nextInt(128));
-        } else if (s.equals("double")) {
-            return String.format("%.4f", r.nextDouble() * 100);
-        } else if (s.equals("float")) {
-            return String.format("%.4f", r.nextFloat() * 100);
-        } else if (s.equals("decimal")) {
-            return String.format("%.4f", r.nextDouble() * 100);
-        } else if (s.equals("date")) {
-            long date20131231 = 61349312153265L;
-            long date20010101 = 60939158400000L;
-            long diff = date20131231 - date20010101;
-            Date temp = new Date(date20010101 + (long) (diff * r.nextDouble()));
-            Calendar cal = Calendar.getInstance();
-            cal.setTime(temp);
-            // first day
-            cal.set(Calendar.DAY_OF_WEEK, cal.getFirstDayOfWeek());
-
-            return cal.get(Calendar.YEAR) + "-" + normToTwoDigits(cal.get(Calendar.MONTH)
+ 1) + "-" + normToTwoDigits(cal.get(Calendar.DAY_OF_MONTH));
-        } else {
-            System.out.println("The data type " + type + "is not recognized");
-            System.exit(1);
-        }
-        return null;
-    }
-
-    private String createDefaultsCell(String type) {
-        String s = type.toLowerCase();
-        if (s.equals("string") || s.equals("char") || s.equals("varchar")) {
-            return "abcde";
-        } else if (s.equals("bigint") || s.equals("int") || s.equals("tinyint") || s.equals("smallint"))
{
-            return "0";
-        } else if (s.equals("double")) {
-            return "0";
-        } else if (s.equals("float")) {
-            return "0";
-        } else if (s.equals("decimal")) {
-            return "0";
-        } else if (s.equals("date")) {
-            return "1970-01-01";
-        } else {
-            System.out.println("The data type " + type + "is not recognized");
-            System.exit(1);
-        }
-        return null;
-    }
-
-    private void printColumnMappings(TreeMap<String, String> factTableCol2LookupCol,
TreeSet<String> usedCols, TreeSet<String> defaultColumns) {
-
-        System.out.println("=======================================================================");
-        System.out.format("%-30s %s", "FACT_TABLE_COLUMN", "MAPPING");
-        System.out.println();
-        System.out.println();
-        for (Map.Entry<String, String> entry : factTableCol2LookupCol.entrySet()) {
-            System.out.format("%-30s %s", entry.getKey(), entry.getValue());
-            System.out.println();
-        }
-        for (String key : usedCols) {
-            System.out.format("%-30s %s", key, "Random Values");
-            System.out.println();
-        }
-        for (String key : defaultColumns) {
-            System.out.format("%-30s %s", key, "Default Values");
-            System.out.println();
-        }
-        System.out.println("=======================================================================");
-
-        System.out.println("Parameters:");
-        System.out.println();
-        System.out.println("CubeName:        " + cubeName);
-        System.out.println("RowCount:        " + rowCount);
-        System.out.println("ConflictRatio:   " + conflictRatio);
-        System.out.println("LinkableRatio:   " + linkableRatio);
-        System.out.println("Seed:            " + randomSeed);
-        System.out.println();
-        System.out.println("The number of actual unlinkable fact rows is: " + this.unlinkableRowCount);
-        System.out.println("You can vary the above parameters to generate different datasets.");
-        System.out.println();
-    }
-
-    // Any row in the column must finally appear in the flatten big table.
-    // for single-column joins the generated row is guaranteed to have a match
-    // in lookup table
-    // for composite keys we'll need an extra check
-    private boolean matchAllCompositeKeys(TreeMap<String, String> lookupCol2FactTableCol,
LinkedList<String> columnValues) {
-        KylinConfig config = KylinConfig.getInstanceFromEnv();
-
-        for (String lookupTable : lookupTableKeys.keySet()) {
-            if (lookupTableKeys.get(lookupTable).size() == 1)
-                continue;
-
-            String[] comboKey = new String[lookupTableKeys.get(lookupTable).size()];
-            int index = 0;
-            for (String column : lookupTableKeys.get(lookupTable)) {
-                String key = lookupTable + "/" + column;
-                String factTableCol = lookupCol2FactTableCol.get(key);
-                int cardinal = MetadataManager.getInstance(config).getTableDesc(factTableName).findColumnByName(factTableCol).getZeroBasedIndex();
-                comboKey[index] = columnValues.get(cardinal);
-
-                index++;
-            }
-            Array<String> wrap = new Array<String>(comboKey);
-            if (!lookupTableCompositeKeyValues.get(lookupTable).contains(wrap)) {
-                // System.out.println("Try " + wrap + " Failed, continue...");
-                return false;
-            }
-        }
-        return true;
-    }
-
-    private String createCell(ColumnDesc cDesc) throws Exception {
-        ColumnConfig cConfig = null;
-
-        if ((cConfig = genConf.getColumnConfigByName(cDesc.getName())) == null) {
-            // if the column is not configured, use random values
-            return (createRandomCell(cDesc));
-
-        } else {
-            // the column has a configuration
-            if (!cConfig.isAsRange() && !cConfig.isExclusive() && r.nextBoolean())
{
-                // if the column still allows random values
-                return (createRandomCell(cDesc));
-
-            } else {
-                // use specified values
-                ArrayList<String> valueSet = cConfig.getValueSet();
-                if (valueSet == null || valueSet.size() == 0)
-                    throw new Exception("Did you forget to specify value set for " + cDesc.getName());
-
-                if (!cConfig.isAsRange()) {
-                    return (randomPick(valueSet));
-                } else {
-                    if (valueSet.size() != 2)
-                        throw new Exception("Only two values can be set for range values,
the column: " + cDesc.getName());
-
-                    return (createRandomCell(cDesc, valueSet));
-                }
-            }
-
-        }
-    }
-
-    private LinkedList<String> createRow(TreeMap<String, String> factTableCol2LookupCol,
TreeSet<String> usedCols, TreeSet<String> defaultColumns) throws Exception {
-        LinkedList<String> columnValues = new LinkedList<String>();
-
-        long currentRowTime = -1;
-
-        for (TblColRef col : cube.getModel().getRootFactTable().getColumns()) {
-
-            String colName = col.getName();
-
-            if (factTableCol2LookupCol.containsKey(colName)) {
-
-                // if the current column is a fk column in fact table
-                ArrayList<String> candidates = this.feasibleValues.get(factTableCol2LookupCol.get(colName));
-
-                columnValues.add(candidates.get(r.nextInt(candidates.size())));
-            } else if (usedCols.contains(colName)) {
-                // if the current column is a metric or dimension column in fact table
-                columnValues.add(createCell(col.getColumnDesc()));
-            } else {
-
-                // otherwise this column is not useful in OLAP
-                columnValues.add(createDefaultsCell(col.getColumnDesc().getTypeName()));
-                defaultColumns.add(colName);
-            }
-
-            if (col.equals(cube.getModel().getPartitionDesc().getPartitionDateColumnRef()))
{
-                currentRowTime = format.parse(columnValues.get(columnValues.size() - 1)).getTime();
-            }
-        }
-
-        for (Integer index : differentiateColumns) {
-            if (r.nextBoolean()) {//only change half of data
-                if (currentRowTime >= differentiateBoundary) {
-                    columnValues.set(index, columnValues.get(index) + "_B");
-                } else {
-                    columnValues.set(index, columnValues.get(index) + "_A");
-                }
-            }
-        }
-
-        return columnValues;
-    }
-
-    /**
-     * return the text of table contents(one line one row)
-     *
-     * @param rowCount
-     * @param factTableCol2LookupCol
-     * @param lookupCol2FactTableCol
-     * @param usedCols
-     * @return
-     * @throws Exception
-     */
-    private String createTable(int rowCount, TreeMap<String, String> factTableCol2LookupCol,
TreeMap<String, String> lookupCol2FactTableCol, TreeSet<String> usedCols) throws
Exception {
-        try {
-            TreeSet<String> defaultColumns = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
-
-            StringBuffer sb = new StringBuffer();
-            for (int i = 0; i < rowCount;) {
-
-                LinkedList<String> columnValues = createRow(factTableCol2LookupCol,
usedCols, defaultColumns);
-
-                if (!matchAllCompositeKeys(lookupCol2FactTableCol, columnValues)) {
-                    if (unlinkableRowCount < unlinkableRowCountMax) {
-                        unlinkableRowCount++;
-                    } else {
-                        continue;
-                    }
-                }
-
-                for (String c : columnValues)
-                    sb.append(c + ",");
-                sb.deleteCharAt(sb.length() - 1);
-                sb.append(System.getProperty("line.separator"));
-
-                i++;
-
-                // System.out.println("Just generated the " + i + "th record");
-            }
-
-            printColumnMappings(factTableCol2LookupCol, usedCols, defaultColumns);
-
-            return sb.toString();
-
-        } catch (IOException e) {
-            e.printStackTrace();
-            System.exit(1);
-        }
-
-        return null;
-    }
-
-    /**
-     * Randomly create a fact table and return the table content
-     *
-     * @param cubeName      name of the cube
-     * @param rowCount      expected row count generated
-     * @param linkableRatio the percentage of fact table rows that can be linked with all
-     *                      lookup table by INNER join
-     * @param randomSeed    random seed
-     */
-    public static String generate(String cubeName, String rowCount, String linkableRatio,
String randomSeed) throws Exception {
-
-        if (rowCount == null)
-            rowCount = "10000";
-        if (linkableRatio == null)
-            linkableRatio = "0.6";
-
-        //if (randomSeed == null)
-        // don't give it value
-
-        // String conflictRatio = "5";//this parameter do not allow configuring
-        // any more
-
-        FactTableGenerator generator = new FactTableGenerator();
-        long seed;
-        if (randomSeed != null) {
-            seed = Long.parseLong(randomSeed);
-        } else {
-            Random r = new Random();
-            seed = r.nextLong();
-        }
-
-        generator.init(cubeName, Integer.parseInt(rowCount), 5, Double.parseDouble(linkableRatio),
seed);
-        generator.prepare();
-        return generator.cookData();
-    }
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
----------------------------------------------------------------------
diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
deleted file mode 100644
index 5204d2a..0000000
--- a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.job.dataGen;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-
-import org.apache.kylin.common.util.JsonUtil;
-
-import com.fasterxml.jackson.annotation.JsonAutoDetect;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.fasterxml.jackson.core.JsonParseException;
-import com.fasterxml.jackson.databind.JsonMappingException;
-
-/**
- */
-@JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.NONE, getterVisibility = JsonAutoDetect.Visibility.NONE,
isGetterVisibility = JsonAutoDetect.Visibility.NONE, setterVisibility = JsonAutoDetect.Visibility.NONE)
-public class GenConfig {
-
-    @JsonProperty("columnConfigs")
-    private ArrayList<ColumnConfig> columnConfigs;
-
-    @JsonProperty("differentiateBoundary")
-    private String differentiateBoundary; //data before and after the provided date will
be different, so that different segments will have different segments
-
-    private HashMap<String, ColumnConfig> cache = new HashMap<String, ColumnConfig>();
-
-    public String getDifferentiateBoundary() {
-        return differentiateBoundary;
-    }
-
-    public void setDifferentiateBoundary(String differentiateBoundary) {
-        this.differentiateBoundary = differentiateBoundary;
-    }
-
-    public ArrayList<ColumnConfig> getColumnConfigs() {
-        return columnConfigs;
-    }
-
-    public void setColumnConfigs(ArrayList<ColumnConfig> columnConfigs) {
-        this.columnConfigs = columnConfigs;
-    }
-
-    public ColumnConfig getColumnConfigByName(String columnName) {
-        columnName = columnName.toLowerCase();
-
-        if (cache.containsKey(columnName))
-            return cache.get(columnName);
-
-        for (ColumnConfig cConfig : columnConfigs) {
-            if (cConfig.getColumnName().toLowerCase().equals(columnName)) {
-                cache.put(columnName, cConfig);
-                return cConfig;
-            }
-        }
-        cache.put(columnName, null);
-        return null;
-    }
-
-    public static GenConfig loadConfig(InputStream stream) {
-        try {
-            GenConfig config = JsonUtil.readValue(stream, GenConfig.class);
-            return config;
-        } catch (JsonMappingException e) {
-            e.printStackTrace();
-        } catch (JsonParseException e) {
-            e.printStackTrace();
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-
-        return null;
-    }
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/examples/test_case_data/localmeta/data/data_gen_config.json
----------------------------------------------------------------------
diff --git a/examples/test_case_data/localmeta/data/data_gen_config.json b/examples/test_case_data/localmeta/data/data_gen_config.json
deleted file mode 100644
index 15b3fd0..0000000
--- a/examples/test_case_data/localmeta/data/data_gen_config.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
-  "differentiateBoundary": "2013-07-01",
-  "columnConfigs": [
-    {
-      "columnName": "lstg_format_name",
-      "valueSet": [
-        "FP-GTC",
-        "FP-non GTC",
-        "ABIN",
-        "Auction",
-        "Others"
-      ],
-      "exclusive": true,
-      "differentiateByDateBoundary": true
-    },
-    {
-      "columnName": "BUYER_COUNTRY",
-      "valueSet": [
-        "CN",
-        "DE",
-        "FR",
-        "JP",
-        "UK",
-        "US"
-      ],
-      "exclusive": true
-    },
-    {
-      "columnName": "SELLER_COUNTRY",
-      "valueSet": [
-        "CN",
-        "DE",
-        "FR",
-        "JP",
-        "UK",
-        "US"
-      ],
-      "exclusive": true
-    },
-    {
-      "columnName": "SELLER_ID",
-      "valueSet": [
-        "10000000",
-        "10001000"
-      ],
-      "asRange": true
-    },
-    {
-      "columnName": "ITEM_COUNT",
-      "valueSet": [
-        "0",
-        "2000000"
-      ],
-      "asRange": true
-    },
-    {
-      "columnName": "PRICE",
-      "valueSet": [
-        "0",
-        "1000"
-      ],
-      "asRange": true
-    }
-  ]
-}


Mime
View raw message