hive-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Gopal V (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (HIVE-19743) hive is not pushing predicate down to HBaseStorageHandler if hive key mapped with hbase is stored as varchar
Date Wed, 30 May 2018 21:55:00 GMT

    [ https://issues.apache.org/jira/browse/HIVE-19743?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16495740#comment-16495740
] 

Gopal V commented on HIVE-19743:
--------------------------------

UDFToString(col) = <constant>

instead of 

col = cast(<constant> as varchar(n))

> hive is not pushing predicate down to HBaseStorageHandler if hive key mapped with hbase
is stored as varchar
> ------------------------------------------------------------------------------------------------------------
>
>                 Key: HIVE-19743
>                 URL: https://issues.apache.org/jira/browse/HIVE-19743
>             Project: Hive
>          Issue Type: Bug
>          Components: HBase Handler, Hive
>    Affects Versions: 2.1.0
>         Environment: java8,centos7
>            Reporter: Rajkumar Singh
>            Priority: Major
>
> Steps to Reproduce:
> {code}
> //hbase table
> create 'mytable', 'cf'
> put 'mytable', 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4', 'cf:message', 'hello
world'
> put 'mytable', 'ABCDEF1|GHIJK1|ijj123kl-mn4o-4pq5-678r-st90123u0v41', 'cf:foo', 0x0
> // hive table with key stored as varchar
> show create table hbase_table_4;
> +-----------------------------------------------------------+--+
> |                      createtab_stmt                       |
> +-----------------------------------------------------------+--+
> | CREATE EXTERNAL TABLE `hbase_table_4`(                    |
> |   `hbase_key` varchar(80) COMMENT 'from deserializer',    |
> |   `value` string COMMENT 'from deserializer',             |
> |   `value1` string COMMENT 'from deserializer')            |
> | ROW FORMAT SERDE                                          |
> |   'org.apache.hadoop.hive.hbase.HBaseSerDe'               |
> | STORED BY                                                 |
> |   'org.apache.hadoop.hive.hbase.HBaseStorageHandler'      |
> | WITH SERDEPROPERTIES (                                    |
> |   'hbase.columns.mapping'=':key,cf:foo,cf:message',       |
> |   'serialization.format'='1')                             |
> | TBLPROPERTIES (                                           |
> |   'COLUMN_STATS_ACCURATE'='\{\"BASIC_STATS\":\"true\"}',   |
> |   'hbase.table.name'='mytable',                           |
> |   'numFiles'='0',                                         |
> |   'numRows'='0',                                          |
> |   'rawDataSize'='0',                                      |
> |   'totalSize'='0',                                        |
> |   'transient_lastDdlTime'='1527708430')                   |
> +-----------------------------------------------------------+--+
>  
> // hive table key stored as string
> CREATE EXTERNAL TABLE `hbase_table_5`(                    |
> |   `hbase_key` string COMMENT 'from deserializer',         |
> |   `value` string COMMENT 'from deserializer',             |
> |   `value1` string COMMENT 'from deserializer')            |
> | ROW FORMAT SERDE                                          |
> |   'org.apache.hadoop.hive.hbase.HBaseSerDe'               |
> | STORED BY                                                 |
> |   'org.apache.hadoop.hive.hbase.HBaseStorageHandler'      |
> | WITH SERDEPROPERTIES (                                    |
> |   'hbase.columns.mapping'=':key,cf:foo,cf:message',       |
> |   'serialization.format'='1')                             |
> | TBLPROPERTIES (                                           |
> |   'COLUMN_STATS_ACCURATE'='\{\"BASIC_STATS\":\"true\"}',   |
> |   'hbase.table.name'='mytable',                           |
> |   'numFiles'='0',                                         |
> |   'numRows'='0',                                          |
> |   'rawDataSize'='0',                                      |
> |   'totalSize'='0',                                        |
> |   'transient_lastDdlTime'='1527708520')                   |
>  
> Explain Plan
>  explain select * from hbase_table_4 where hbase_key='ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4'
>  Stage-0                                                   
                                      |
> |   Fetch Operator                                            
                                    |
> |     limit:-1                                              
                                      |
> |     Select Operator [SEL_2]                                   
                                  |
> |       Output:["_col0","_col1","_col2"]                            
                              |
> |       Filter Operator [FIL_4]                                 
                                  |
> |         predicate:(UDFToString(hbase_key) = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4') 
|
> |         TableScan [TS_0]                                    
                                    |
> |           Output:["hbase_key","value","value1"] 
>  
> explain on table with key stored as string
> explain select * from hbase_table_5 where hbase_key='ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4';
>  Plan optimized by CBO.                  |
> |                                         |
> | Stage-0                                 |
> |   Fetch Operator                        |
> |     limit:-1                            |
> |     Select Operator [SEL_2]             |
> |       Output:["_col0","_col1","_col2"]  |
> |       TableScan [TS_0]                  |
> |         Output:["value","value1"] 
> {code}
>  
> predicate push down correctly on table which has hbase row key as string 
> {code}
> 2018-05-30 21:26:45,418 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(972))
- Original predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')
> 2018-05-30 21:26:45,418 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(975))
- Pushed predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')
> 2018-05-30 21:26:45,418 DEBUG [main]: ppd.PredicatePushDown (PredicatePushDown.java:transform(138))
- After PPD:
> TS[0]-SEL[2]-FS[3]
> {code}
>  while row key stored as varchar the predicate dont pushed down to predicate and fallback
to residual
> {code}
> 2018-05-30 21:29:29,303 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(972))
- Original predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')
> 2018-05-30 21:29:29,303 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(980))
- Residual predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')
> 2018-05-30 21:29:29,303 DEBUG [main]: ppd.PredicatePushDown (PredicatePushDown.java:transform(138))
- After PPD:
> TS[0]-FIL[4]-SEL[2]-FS[3]
> {code}
>  
>  
>  
>  
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Mime
View raw message