drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (DRILL-3188) Restrict the types of window frames that can be specified
Date Thu, 10 Sep 2015 17:41:45 GMT

    [ https://issues.apache.org/jira/browse/DRILL-3188?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14739213#comment-14739213
] 

ASF GitHub Bot commented on DRILL-3188:
---------------------------------------

Github user jinfengni commented on a diff in the pull request:

    https://github.com/apache/drill/pull/152#discussion_r39189463
  
    --- Diff: exec/java-exec/src/test/java/org/apache/drill/exec/TestWindowFunctions.java
---
    @@ -165,49 +258,182 @@ public void testWindowGroupByOnView() throws Exception {
     
       @Test // DRILL-3188
       public void testWindowFrameEquivalentToDefault() throws Exception {
    -    final String query1 = "explain plan for select sum(n_nationKey) over(partition by
n_nationKey order by n_nationKey) \n" +
    +    final String query1 = "select sum(n_nationKey) over(partition by n_nationKey order
by n_nationKey) as col\n" +
             "from cp.`tpch/nation.parquet` t \n" +
             "order by n_nationKey";
     
    -    final String query2 = "explain plan for select sum(n_nationKey) over(partition by
n_nationKey order by n_nationKey \n" +
    -        "range between unbounded preceding and current row) \n" +
    +    final String query2 = "select sum(n_nationKey) over(partition by n_nationKey order
by n_nationKey \n" +
    +        "range between unbounded preceding and current row) as col \n" +
             "from cp.`tpch/nation.parquet` t \n" +
             "order by n_nationKey";
     
    -    final String query3 = "explain plan for select sum(n_nationKey) over(partition by
n_nationKey \n" +
    -        "rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)" +
    +    final String query3 = "select sum(n_nationKey) over(partition by n_nationKey \n"
+
    +        "rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as col \n" +
             "from cp.`tpch/nation.parquet` t \n" +
             "order by n_nationKey";
     
    -    test(query1);
    -    test(query2);
    -    test(query3);
    +    // Validate the plan
    +    final String[] expectedPlan1 = {"Window.*partition \\{0\\} order by \\[0\\].*SUM\\(\\$0\\)",
    +        "Scan.*columns=\\[`n_nationKey`\\].*"};
    +    final String[] excludedPatterns1 = {"Scan.*columns=\\[`\\*`\\].*"};
    +    PlanTestBase.testPlanMatchingPatterns(query1, expectedPlan1, excludedPatterns1);
    +
    +    testBuilder()
    +        .sqlQuery(query1)
    +        .unOrdered()
    +        .baselineColumns("col")
    +        .baselineValues(0l)
    +        .baselineValues(1l)
    +        .baselineValues(2l)
    +        .baselineValues(3l)
    +        .baselineValues(4l)
    +        .baselineValues(5l)
    +        .baselineValues(6l)
    +        .baselineValues(7l)
    +        .baselineValues(8l)
    +        .baselineValues(9l)
    +        .baselineValues(10l)
    +        .baselineValues(11l)
    +        .baselineValues(12l)
    +        .baselineValues(13l)
    +        .baselineValues(14l)
    +        .baselineValues(15l)
    +        .baselineValues(16l)
    +        .baselineValues(17l)
    +        .baselineValues(18l)
    +        .baselineValues(19l)
    +        .baselineValues(20l)
    +        .baselineValues(21l)
    +        .baselineValues(22l)
    +        .baselineValues(23l)
    +        .baselineValues(24l)
    +        .build()
    +        .run();
    +
    +    final String[] expectedPlan2 = {"Window.*partition \\{0\\} order by \\[0\\].*SUM\\(\\$0\\)",
    +        "Scan.*columns=\\[`n_nationKey`\\].*"};
    +    final String[] excludedPatterns2 = {"Scan.*columns=\\[`\\*`\\].*"};
    +    PlanTestBase.testPlanMatchingPatterns(query2, expectedPlan2, excludedPatterns2);
    +
    +    testBuilder()
    +        .sqlQuery(query2)
    +        .unOrdered()
    +        .baselineColumns("col")
    +        .baselineValues(0l)
    +        .baselineValues(1l)
    +        .baselineValues(2l)
    +        .baselineValues(3l)
    +        .baselineValues(4l)
    +        .baselineValues(5l)
    +        .baselineValues(6l)
    +        .baselineValues(7l)
    +        .baselineValues(8l)
    +        .baselineValues(9l)
    +        .baselineValues(10l)
    +        .baselineValues(11l)
    +        .baselineValues(12l)
    +        .baselineValues(13l)
    +        .baselineValues(14l)
    +        .baselineValues(15l)
    +        .baselineValues(16l)
    +        .baselineValues(17l)
    +        .baselineValues(18l)
    +        .baselineValues(19l)
    +        .baselineValues(20l)
    +        .baselineValues(21l)
    +        .baselineValues(22l)
    +        .baselineValues(23l)
    +        .baselineValues(24l)
    +        .build()
    +        .run();
    +
    +    final String[] expectedPlan3 = {"Window.*partition \\{0\\}.*SUM\\(\\$0\\)",
    +        "Scan.*columns=\\[`n_nationKey`\\].*"};
    +    final String[] excludedPatterns3 = {"Scan.*columns=\\[`\\*`\\].*"};
    +    PlanTestBase.testPlanMatchingPatterns(query3, expectedPlan3, excludedPatterns3);
    +
    +    testBuilder()
    +        .sqlQuery(query3)
    +        .unOrdered()
    +        .baselineColumns("col")
    +        .baselineValues(0l)
    +        .baselineValues(1l)
    +        .baselineValues(2l)
    +        .baselineValues(3l)
    +        .baselineValues(4l)
    +        .baselineValues(5l)
    +        .baselineValues(6l)
    +        .baselineValues(7l)
    +        .baselineValues(8l)
    +        .baselineValues(9l)
    +        .baselineValues(10l)
    +        .baselineValues(11l)
    +        .baselineValues(12l)
    +        .baselineValues(13l)
    +        .baselineValues(14l)
    +        .baselineValues(15l)
    +        .baselineValues(16l)
    +        .baselineValues(17l)
    +        .baselineValues(18l)
    +        .baselineValues(19l)
    +        .baselineValues(20l)
    +        .baselineValues(21l)
    +        .baselineValues(22l)
    +        .baselineValues(23l)
    +        .baselineValues(24l)
    +        .build()
    +        .run();
       }
     
       @Test // DRILL-3204
       public void testWindowWithJoin() throws Exception {
    -    final String query = "select sum(t1.r_regionKey) over(partition by t1.r_regionKey)
 \n" +
    +    final String query = "select sum(t1.r_regionKey) over(partition by t1.r_regionKey)
as col \n" +
             "from cp.`tpch/region.parquet` t1, cp.`tpch/nation.parquet` t2 \n" +
             "where t1.r_regionKey = t2.n_nationKey \n" +
             "group by t1.r_regionKey";
     
    -    test(query);
    +    // Validate the plan
    +    final String[] expectedPlan = {"Window.*partition \\{0\\}.*SUM\\(\\$0\\)",
    +        "Scan.*columns=\\[`n_nationKey`\\].*",
    +        "Scan.*columns=\\[`n_nationKey`\\].*"};
    +    final String[] excludedPatterns = {"Scan.*columns=\\[`\\*`\\].*"};
    +    PlanTestBase.testPlanMatchingPatterns(query, expectedPlan, excludedPatterns);
    +
    +    testBuilder()
    +        .sqlQuery(query)
    +        .unOrdered()
    +        .baselineColumns("col")
    +        .baselineValues(0l)
    +        .baselineValues(1l)
    +        .baselineValues(2l)
    +        .baselineValues(3l)
    +        .baselineValues(4l)
    +        .build()
    +        .run();
       }
     
       @Test // DRILL-3298
       public void testCountEmptyPartitionByWithExchange() throws Exception {
         String query = String.format("select count(*) over (order by o_orderpriority) as
cnt from dfs.`%s/multilevel/parquet` where o_custkey < 100", TEST_RES_PATH);
         try {
    +      // Validate the plan
    +      final String[] expectedPlan = {"Window.*partition \\{\\} order by \\[0\\].*COUNT\\(\\)",
    +          "Scan.*columns=\\[`o_custkey`, `o_orderpriority`\\]"};
    +      final String[] excludedPatterns = {"Scan.*columns=\\[`\\*`\\]"};
    +      test("alter session set `planner.slice_target` = 1");
    --- End diff --
    
    This line seems to be redundant, since line 430 set the same option, right?



> Restrict the types of window frames that can be specified
> ---------------------------------------------------------
>
>                 Key: DRILL-3188
>                 URL: https://issues.apache.org/jira/browse/DRILL-3188
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Query Planning & Optimization
>    Affects Versions: 1.0.0
>            Reporter: Victoria Markman
>            Assignee: Sean Hsuan-Yi Chu
>            Priority: Critical
>              Labels: window_function
>             Fix For: 1.1.0
>
>
> We don't support row range with window functions. So we should disable this functionality,
because currently we return default frame result.
> The only frame we currently support is BETWEEN UNBOUNDED PRECEDING  AND CURRENT ROW.
> If you don't specify frame in Calcite, this is exactly what you get:
> {code}
> 0: jdbc:drill:schema=dfs> explain plan for select a2,b2,c2, cast(count(*) over(partition
by a2, substr(b2,1,2),c2 order by cast(a2 as double) + 100 ) as bigint) from t2 order by a2;
> +------+------+
> | text | json |
> +------+------+
> | 00-00    Screen
> 00-01      Project(a2=[$0], b2=[$1], c2=[$2], EXPR$3=[$3])
> 00-02        SelectionVectorRemover
> 00-03          Sort(sort0=[$0], dir0=[ASC])
> 00-04            Project(a2=[$0], b2=[$1], c2=[$2], EXPR$3=[$5])
> 00-05              Window(window#0=[window(partition {0, 2, 4} order by [3] range between
UNBOUNDED PRECEDING and CURRENT ROW aggs [COUNT()])])
> 00-06                SelectionVectorRemover
> 00-07                  Sort(sort0=[$0], sort1=[$2], sort2=[$4], sort3=[$3], dir0=[ASC],
dir1=[ASC], dir2=[ASC], dir3=[ASC])
> 00-08                    Project(a2=[$1], b2=[$0], c2=[$2], $3=[+(CAST($1):DOUBLE, 100)],
$4=[SUBSTR($0, 1, 2)])
> 00-09                      Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath
[path=maprfs:///drill/testdata/aggregation/t2]], selectionRoot=/drill/testdata/aggregation/t2,
numFiles=1, columns=[`a2`, `b2`, `c2`]]])
> {code}
> Row range should be disabled as well:
> {code}
> 0: jdbc:drill:schema=dfs> select * from t2;
> +-----+--------+-------------+
> | a2  |   b2   |     c2      |
> +-----+--------+-------------+
> | 0   | zzz    | 2014-12-31  |
> | 1   | aaaaa  | 2015-01-01  |
> | 2   | bbbbb  | 2015-01-02  |
> | 2   | bbbbb  | 2015-01-02  |
> | 2   | bbbbb  | 2015-01-02  |
> | 3   | ccccc  | 2015-01-03  |
> | 4   | ddddd  | 2015-01-04  |
> | 5   | eeeee  | 2015-01-05  |
> | 6   | fffff  | 2015-01-06  |
> | 7   | ggggg  | 2015-01-07  |
> | 7   | ggggg  | 2015-01-07  |
> | 8   | hhhhh  | 2015-01-08  |
> | 9   | iiiii  | 2015-01-09  |
> +-----+--------+-------------+
> 13 rows selected (0.123 seconds)
> 0: jdbc:drill:schema=dfs> select a2, sum(a2) over(partition by a2 order by a2 rows
between 1 preceding and 1 following ) from t2 order by a2;
> +-----+---------+
> | a2  | EXPR$1  |
> +-----+---------+
> | 0   | 0       |
> | 1   | 1       |
> | 2   | 6       |
> | 2   | 6       |
> | 2   | 6       |
> | 3   | 3       |
> | 4   | 4       |
> | 5   | 5       |
> | 6   | 6       |
> | 7   | 14      |
> | 7   | 14      |
> | 8   | 8       |
> | 9   | 9       |
> +-----+---------+
> 13 rows selected (0.2 seconds)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message