hive-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Gopal V (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (HIVE-12808) Logical PPD: Push filter clauses through PTF into TS
Date Fri, 08 Jan 2016 00:24:40 GMT

     [ https://issues.apache.org/jira/browse/HIVE-12808?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Gopal V updated HIVE-12808:
---------------------------
    Description: 
Simplified repro case of [HCC #8880|https://community.hortonworks.com/questions/8880/hive-on-tez-pushdown-predicate-doesnt-work-in-part.html],
with the slow query showing the push-down miss. 

And the manually rewritten query to indicate the expected one.

Part of the problem could be the window range not being split apart for PPD, but the FIL is
not pushed down even if the rownum filter is removed.

{code}
create temporary table positions (regionid string, id bigint, deviceid string, ts string);

insert into positions values('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-01'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-01'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-02'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-02');


-- slow query
explain
WITH t1 AS 
( 
         SELECT   *, 
                  Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts DESC)
AS rownos
         FROM     positions ), 
latestposition as ( 
       SELECT * 
       FROM   t1 
       WHERE  rownos = 1) 
SELECT * 
FROM   latestposition 
WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
AND    id=1422792010 
AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432';

-- fast query
explain
WITH t1 AS 
( 
         SELECT   *, 
                  Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts DESC)
AS rownos
         FROM     positions 
         WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
         AND    id=1422792010 
         AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432'
),latestposition as ( 
       SELECT * 
       FROM   t1 
       WHERE  rownos = 1) 
SELECT * 
FROM   latestposition 
;
{code}

  was:
Simplified repro case, with the slow query showing the push-down miss. 

And the manually rewritten query to indicate the expected one.

Part of the problem could be the window range not being split apart for PPD, but the FIL is
not pushed down even if the rownum filter is removed.

{code}
create temporary table positions (regionid string, id bigint, deviceid string, ts string);

insert into positions values('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-01'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-01'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-02'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-02');


-- slow query
explain
WITH t1 AS 
( 
         SELECT   *, 
                  Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts DESC)
AS rownos
         FROM     positions ), 
latestposition as ( 
       SELECT * 
       FROM   t1 
       WHERE  rownos = 1) 
SELECT * 
FROM   latestposition 
WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
AND    id=1422792010 
AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432';

-- fast query
explain
WITH t1 AS 
( 
         SELECT   *, 
                  Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts DESC)
AS rownos
         FROM     positions 
         WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
         AND    id=1422792010 
         AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432'
),latestposition as ( 
       SELECT * 
       FROM   t1 
       WHERE  rownos = 1) 
SELECT * 
FROM   latestposition 
;
{code}


> Logical PPD: Push filter clauses through PTF into TS
> ----------------------------------------------------
>
>                 Key: HIVE-12808
>                 URL: https://issues.apache.org/jira/browse/HIVE-12808
>             Project: Hive
>          Issue Type: Bug
>          Components: Logical Optimizer
>    Affects Versions: 1.2.1, 2.0.0
>            Reporter: Gopal V
>            Assignee: Laljo John Pullokkaran
>
> Simplified repro case of [HCC #8880|https://community.hortonworks.com/questions/8880/hive-on-tez-pushdown-predicate-doesnt-work-in-part.html],
with the slow query showing the push-down miss. 
> And the manually rewritten query to indicate the expected one.
> Part of the problem could be the window range not being split apart for PPD, but the
FIL is not pushed down even if the rownum filter is removed.
> {code}
> create temporary table positions (regionid string, id bigint, deviceid string, ts string);
> insert into positions values('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-01'),
> ('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-01'),
> ('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-02'),
> ('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432',
'2016-01-02');
> -- slow query
> explain
> WITH t1 AS 
> ( 
>          SELECT   *, 
>                   Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts
DESC) AS rownos
>          FROM     positions ), 
> latestposition as ( 
>        SELECT * 
>        FROM   t1 
>        WHERE  rownos = 1) 
> SELECT * 
> FROM   latestposition 
> WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
> AND    id=1422792010 
> AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432';
> -- fast query
> explain
> WITH t1 AS 
> ( 
>          SELECT   *, 
>                   Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts
DESC) AS rownos
>          FROM     positions 
>          WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
>          AND    id=1422792010 
>          AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432'
> ),latestposition as ( 
>        SELECT * 
>        FROM   t1 
>        WHERE  rownos = 1) 
> SELECT * 
> FROM   latestposition 
> ;
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message