kylin-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ShaoFeng Shi <shaofeng...@apache.org>
Subject Re: 关于kylin查询问题咨询
Date Tue, 14 Feb 2017 14:35:47 GMT
Kaisen made several performance improvement on the bitmap measure; You can
check http://mt.sohu.com/20170113/n478613681.shtml if haven't.

BTW, these improvements will be included in the 2.0 release;

2017-02-14 11:05 GMT+08:00 仇同心 <qiutongxin@jd.com>:

> 大家好:
>
> 目前在使用中遇到一个kylin查询慢的场景:
>
> Cube  设计如下:
>
> {
>
>   "uuid": "dfb77a08-f51d-4088-b559-1da67c28a068",
>
>   "last_modified": 1486997875953,
>
>   "version": "1.6.0",
>
>   "name": "insu_t",
>
>   "model_name": "insu_jdmall_model_test",
>
>   "description": "",
>
>   "null_string": null,
>
>   "dimensions": [
>
>     {
>
>       "name": "BRAND",
>
>       "table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
>
>       "column": "BRAND_CD",
>
>       "derived": null
>
>     },
>
>     {
>
>       "name": "DT",
>
>       "table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
>
>       "column": "DT",
>
>       "derived": null
>
>     },
>
>     {
>
>       "name": "DIM.DIM_DAY_DERIVED",
>
>       "table": "DIM.DIM_DAY",
>
>       "column": null,
>
>       "derived": [
>
>         "DIM_DAY_NAME",
>
>         "DIM_WEEK_NAME",
>
>         "DIM_MONTH_NAME"
>
>       ]
>
>     },
>
>     {
>
>       "name": "FIRST",
>
>      "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
>
>       "column": "ITEM_FIRST_CATE_NAME",
>
>       "derived": null
>
>     },
>
>     {
>
>       "name": "SECOND",
>
>       "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
>
>       "column": "ITEM_SECOND_CATE_NAME",
>
>       "derived": null
>
>     },
>
>     {
>
>       "name": "THIRD",
>
>       "table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
>
>       "column": "ITEM_THIRD_CATE_NAME",
>
>       "derived": null
>
>     }
>
>   ],
>
>   "measures": [
>
>     {
>
>       "name": "_COUNT_",
>
>       "function": {
>
>         "expression": "COUNT",
>
>         "parameter": {
>
>           "type": "constant",
>
>           "value": "1",
>
>           "next_parameter": null
>
>         },
>
>         "returntype": "bigint"
>
>       },
>
>       "dependent_measure_ref": null
>
>     },
>
>     {
>
>       "name": "QTTY",
>
>       "function": {
>
>         "expression": "SUM",
>
>         "parameter": {
>
>           "type": "column",
>
>           "value": "SALE_QTTY",
>
>           "next_parameter": null
>
>         },
>
>         "returntype": "bigint"
>
>       },
>
>       "dependent_measure_ref": null
>
>     },
>
>     {
>
>       "name": "BEFORE",
>
>       "function": {
>
>         "expression": "SUM",
>
>         "parameter": {
>
>           "type": "column",
>
>           "value": "BEFORE_PREFR_AMOUNT",
>
>           "next_parameter": null
>
>         },
>
>         "returntype": "decimal(25,4)"
>
>       },
>
>       "dependent_measure_ref": null
>
>     },
>
>     {
>
>       "name": "USER",
>
>       "function": {
>
>         "expression": "SUM",
>
>         "parameter": {
>
>           "type": "column",
>
>           "value": "USER_ACTUAL_PAY_AMOUNT",
>
>           "next_parameter": null
>
>         },
>
>        "returntype": "decimal(25,4)"
>
>       },
>
>       "dependent_measure_ref": null
>
>     },
>
>     {
>
>       "name": "SALE",
>
>       "function": {
>
>         "expression": "COUNT_DISTINCT",
>
>         "parameter": {
>
>           "type": "column",
>
>           "value": "SALE_ORD_ID",
>
>           "next_parameter": null
>
>         },
>
>         "returntype": "bitmap"
>
>       },
>
>       "dependent_measure_ref": null
>
>     }
>
>   ],
>
>   "dictionaries": [],
>
>   "rowkey": {
>
>     "rowkey_columns": [
>
>       {
>
>         "column": "BRAND_CD",
>
>         "encoding": "dict",
>
>         "isShardBy": false
>
>       },
>
>       {
>
>         "column": "DT",
>
>         "encoding": "dict",
>
>         "isShardBy": false
>
>       },
>
>       {
>
>         "column": "ITEM_FIRST_CATE_NAME",
>
>         "encoding": "dict",
>
>         "isShardBy": false
>
>       },
>
>       {
>
>         "column": "ITEM_SECOND_CATE_NAME",
>
>         "encoding": "dict",
>
>         "isShardBy": false
>
>       },
>
>       {
>
>         "column": "ITEM_THIRD_CATE_NAME",
>
>         "encoding": "dict",
>
>         "isShardBy": false
>
>       }
>
>     ]
>
>   },
>
>   "hbase_mapping": {
>
>     "column_family": [
>
>       {
>
>         "name": "F1",
>
>         "columns": [
>
>           {
>
>             "qualifier": "M",
>
>             "measure_refs": [
>
>               "_COUNT_",
>
>               "QTTY",
>
>               "BEFORE",
>
>               "USER"
>
>             ]
>
>           }
>
>         ]
>
>       },
>
>       {
>
>         "name": "F2",
>
>         "columns": [
>
>           {
>
>             "qualifier": "M",
>
>             "measure_refs": [
>
>               "SALE"
>
>             ]
>
>           }
>
>         ]
>
>       }
>
>     ]
>
>   },
>
>   "aggregation_groups": [
>
>     {
>
>       "includes": [
>
>         "BRAND_CD",
>
>         "DT",
>
>         "ITEM_FIRST_CATE_NAME",
>
>         "ITEM_SECOND_CATE_NAME",
>
>         "ITEM_THIRD_CATE_NAME"
>
>       ],
>
>       "select_rule": {
>
>         "hierarchy_dims": [],
>
>         "mandatory_dims": [],
>
>         "joint_dims": []
>
>       }
>
>     }
>
>   ],
>
>   "signature": "Kl5sPTVN78bEYTGKoUOsWg==",
>
>   "notify_list": [],
>
>   "status_need_notify": [
>
>     "ERROR",
>
>     "DISCARDED",
>
>     "SUCCEED"
>
>   ],
>
>   "partition_date_start": 1483747200000,
>
>   "partition_date_end": 3153600000000,
>
>   "auto_merge_time_ranges": [
>
>     604800000,
>
>     2419200000
>
>   ],
>
>   "retention_range": 0,
>
>   "engine_type": 2,
>
>   "storage_type": 2,
>
>   "override_kylin_properties": {
>
>     "kylin.hbase.region.cut": "1"
>
>   }
>
> }
>
>
>
> 数据量是14天的数据,sale_ord_id的基数是1.5亿
>
> Select dt,item_second_cate_name,count(distinct sale_ord_id),sum(sale_qtty)
>
> from DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D a
>
> left join dim.dim_day b on a.dt = b.dim_day_txdate
>
> left join DIM.DIM_ITEM_GEN_THIRD_CATE_D c on a.item_third_cate_cd =
> c.item_third_cate_id
>
> group by dt,item_second_cate_name;
>
>
>
> 这条语句执行时间是37秒,去掉count(distinct sale_ord_id)后查询时0.07秒
>
> dt,item_second_cate_name 都是normal维度,从结果看是count_distinct
> 度量导致的慢查询,请问这个有什么优化的建议吗?
>
>
>
> 谢谢!
>
>
>
>
>
>
>



-- 
Best regards,

Shaofeng Shi 史少锋

Mime
View raw message