asterixdb-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Yingyi Bu (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (ASTERIXDB-1332) Query fails in rewrite phase with stack overflow
Date Fri, 04 Mar 2016 20:19:40 GMT

    [ https://issues.apache.org/jira/browse/ASTERIXDB-1332?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15180487#comment-15180487
] 

Yingyi Bu commented on ASTERIXDB-1332:
--------------------------------------

Pouria,

     The original query can be compiled in the current master.  However, there are several
nested loop joins in the resulting optimized plan.
     The reason is that:
     1. some join rewriting rules might not be invoked after subquery decorrelation.  I'll
investigate that.
     2. "//where $s1.s_suppkey = $l.l_suppkey" was commented out.

    You can get unblocked by rewriting the query manually a little bit as follows:
    {noformat}
for $profit in (
  for $o in dataset NestedOrders
  for $l3 in (
    for $p in dataset('Part')
    for $l2 in (
      for $ps in dataset('Partsupp')
      for $l1 in (
        for $s1 in (
          for $s in dataset('Supplier')
          for $n in dataset('Nation')
          where $n.n_nationkey = $s.s_nationkey
          return {
            "s_suppkey": $s.s_suppkey,
            "n_name": $n.n_name
          }
        ) //s1
        //for $l in dataset('LineItem')
        for $o in dataset NestedOrders
        for $l in $o.o_lineitems
        where $s1.s_suppkey = $l.l_suppkey       
        return  {
          "l_suppkey": $l.l_suppkey,
          "l_extendedprice": $l.l_extendedprice,
          "l_discount": $l.l_discount,
          "l_quantity": $l.l_quantity,
          "l_partkey": $l.l_partkey,
          "l_orderkey": $o.o_orderkey, //$l.l_orderkey,
          "n_name": $s1.n_name
        }
      ) //l1
      where $ps.ps_suppkey = $l1.l_suppkey and $ps.ps_partkey = $l1.l_partkey       
      return {
        "l_extendedprice": $l1.l_extendedprice,
        "l_discount": $l1.l_discount,
        "l_quantity": $l1.l_quantity,
        "l_partkey": $l1.l_partkey,
        "l_orderkey": $l1.l_orderkey,
        "n_name": $l1.n_name,
        "ps_supplycost": $ps.ps_supplycost
      }
    ) //l2
    where contains($p.p_name, 'green') and $p.p_partkey = $l2.l_partkey    
    return {
      "l_extendedprice": $l2.l_extendedprice,
      "l_discount": $l2.l_discount,
      "l_quantity": $l2.l_quantity,
      "l_orderkey": $l2.l_orderkey,
      "n_name": $l2.n_name,
      "ps_supplycost": $l2.ps_supplycost
    }
  ) //l3
  where $o.o_orderkey = $l3.l_orderkey
  let $amount := $l3.l_extendedprice * (1 - $l3.l_discount) -  $l3.ps_supplycost * $l3.l_quantity

  let $o_year := get-year($o.o_orderdate)
  return {
    "nation": $l3.n_name,
    "o_year": $o_year,
    "amount": $amount
  }
) //profit
group by $nation := $profit.nation, $o_year := $profit.o_year with $profit
order by $nation, $o_year desc
return {
  "nation": $nation, 
  "o_year": $o_year, 
  "sum_profit": sum( for $pr in $profit return $pr.amount )
}
    {noformat}    


   Here is the resulting query plan:
  {noformat}
  -- DISTRIBUTE_RESULT  |PARTITIONED|
  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
    -- STREAM_PROJECT  |PARTITIONED|
      -- ASSIGN  |PARTITIONED|
        -- SORT_MERGE_EXCHANGE [$$14(ASC), $$15(DESC) ]  |PARTITIONED|
          -- PRE_CLUSTERED_GROUP_BY[$$148, $$149]  |PARTITIONED|
                  {
                    -- AGGREGATE  |LOCAL|
                      -- NESTED_TUPLE_SOURCE  |LOCAL|
                  }
            -- HASH_PARTITION_MERGE_EXCHANGE MERGE:[$$148(ASC), $$149(DESC)] HASH:[$$148,
$$149]  |PARTITIONED|
              -- SORT_GROUP_BY[$$121, $$98]  |PARTITIONED|
                      {
                        -- AGGREGATE  |LOCAL|
                          -- NESTED_TUPLE_SOURCE  |LOCAL|
                      }
                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                  -- STREAM_PROJECT  |PARTITIONED|
                    -- ASSIGN  |PARTITIONED|
                      -- STREAM_PROJECT  |PARTITIONED|
                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                          -- HYBRID_HASH_JOIN [$$100][$$106]  |PARTITIONED|
                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                              -- STREAM_PROJECT  |PARTITIONED|
                                -- ASSIGN  |PARTITIONED|
                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                    -- DATASOURCE_SCAN  |PARTITIONED|
                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                        -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
                            -- HASH_PARTITION_EXCHANGE [$$106]  |PARTITIONED|
                              -- STREAM_PROJECT  |PARTITIONED|
                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                  -- HYBRID_HASH_JOIN [$$101][$$126]  |PARTITIONED|
                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                      -- STREAM_PROJECT  |PARTITIONED|
                                        -- STREAM_SELECT  |PARTITIONED|
                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                            -- DATASOURCE_SCAN  |PARTITIONED|
                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                                -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
                                    -- HASH_PARTITION_EXCHANGE [$$126]  |PARTITIONED|
                                      -- STREAM_PROJECT  |PARTITIONED|
                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                          -- HYBRID_HASH_JOIN [$$102, $$103][$$126, $$107]
 |PARTITIONED|
                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                              -- STREAM_PROJECT  |PARTITIONED|
                                                -- ASSIGN  |PARTITIONED|
                                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                                    -- DATASOURCE_SCAN  |PARTITIONED|
                                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                                        -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
                                            -- HASH_PARTITION_EXCHANGE [$$126, $$107]  |PARTITIONED|
                                              -- STREAM_PROJECT  |PARTITIONED|
                                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                                  -- HYBRID_HASH_JOIN [$$104][$$107]  |PARTITIONED|
                                                    -- HASH_PARTITION_EXCHANGE [$$104]  |PARTITIONED|
                                                      -- STREAM_PROJECT  |PARTITIONED|
                                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                                          -- HYBRID_HASH_JOIN [$$110][$$105]
 |PARTITIONED|
                                                            -- HASH_PARTITION_EXCHANGE [$$110]
 |PARTITIONED|
                                                              -- STREAM_PROJECT  |PARTITIONED|
                                                                -- ASSIGN  |PARTITIONED|
                                                                  -- ONE_TO_ONE_EXCHANGE 
|PARTITIONED|
                                                                    -- DATASOURCE_SCAN  |PARTITIONED|
                                                                      -- ONE_TO_ONE_EXCHANGE
 |PARTITIONED|
                                                                        -- EMPTY_TUPLE_SOURCE
 |PARTITIONED|
                                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                                              -- STREAM_PROJECT  |PARTITIONED|
                                                                -- ASSIGN  |PARTITIONED|
                                                                  -- ONE_TO_ONE_EXCHANGE 
|PARTITIONED|
                                                                    -- DATASOURCE_SCAN  |PARTITIONED|
                                                                      -- ONE_TO_ONE_EXCHANGE
 |PARTITIONED|
                                                                        -- EMPTY_TUPLE_SOURCE
 |PARTITIONED|
                                                    -- HASH_PARTITION_EXCHANGE [$$107]  |PARTITIONED|
                                                      -- STREAM_PROJECT  |PARTITIONED|
                                                        -- ASSIGN  |PARTITIONED|
                                                          -- STREAM_PROJECT  |PARTITIONED|
                                                            -- UNNEST  |PARTITIONED|
                                                              -- STREAM_PROJECT  |PARTITIONED|
                                                                -- ASSIGN  |PARTITIONED|
                                                                  -- ONE_TO_ONE_EXCHANGE 
|PARTITIONED|
                                                                    -- DATASOURCE_SCAN  |PARTITIONED|
                                                                      -- ONE_TO_ONE_EXCHANGE
 |PARTITIONED|
                                                                        -- EMPTY_TUPLE_SOURCE
 |PARTITIONED|
  {noformat}

Best,
Yingyi


> Query fails in rewrite phase with stack overflow
> ------------------------------------------------
>
>                 Key: ASTERIXDB-1332
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1332
>             Project: Apache AsterixDB
>          Issue Type: Bug
>          Components: Optimizer
>            Reporter: Pouria
>
> A rewritten version of TPCH Q9 (for nested schema) breaks in re-write step:
> //The error on cc.log
> {noformat}
> 2016-03-04 10:30:04,642 WARN  [qtp85748029-40] util.log (JCLLoggerAdapter.java:warn(437))
- Error for /
> java.lang.StackOverflowError
> 	at org.apache.hyracks.algebricks.core.algebra.typing.PropagatingTypeEnvironment.getVarTypeFullList(PropagatingTypeEnvironment.java:84)
> 	at org.apache.hyracks.algebricks.core.algebra.typing.PropagatingTypeEnvironment.getVarType(PropagatingTypeEnvironment.java:73)
> 	at org.apache.hyracks.algebricks.core.algebra.properties.TypePropagationPolicy$1.getVarType(TypePropagationPolicy.java:42)
> 	at org.apache.hyracks.algebricks.core.algebra.typing.PropagatingTypeEnvironment.getVarTypeFullList(PropagatingTypeEnvironment.java:88)
> 	at org.apache.hyracks.algebricks.core.algebra.typing.PropagatingTypeEnvironment.getVarType(PropagatingTypeEnvironment.java:73)
> 	at org.apache.hyracks.algebricks.core.algebra.properties.TypePropagationPolicy$1.getVarType(TypePropagationPolicy.java:42)
> 	at org.
> [… It repeats for tens of lines]
> {noformat}
> //Here is the Query:
> {noformat}
> for $profit in (
>   for $o in dataset NestedOrders
>   for $l3 in (
>     for $p in dataset('Part')
>     for $l2 in (
>       for $ps in dataset('Partsupp')
>       for $l1 in (
>         for $s1 in (
>           for $s in dataset('Supplier')
>           for $n in dataset('Nation')
>           where $n.n_nationkey = $s.s_nationkey
>           return {
>             "s_suppkey": $s.s_suppkey,
>             "n_name": $n.n_name
>           }
>         ) //s1
>         //for $l in dataset('LineItem')
>         for $l in $o.o_lineitems
>         //where $s1.s_suppkey = $l.l_suppkey       
>         return  {
>           "l_suppkey": $l.l_suppkey,
>           "l_extendedprice": $l.l_extendedprice,
>           "l_discount": $l.l_discount,
>           "l_quantity": $l.l_quantity,
>           "l_partkey": $l.l_partkey,
>           "l_orderkey": $o.o_orderkey, //$l.l_orderkey,
>           "n_name": $s1.n_name
>         }
>       ) //l1
>       where $ps.ps_suppkey = $l1.l_suppkey and $ps.ps_partkey = $l1.l_partkey       
>       return {
>         "l_extendedprice": $l1.l_extendedprice,
>         "l_discount": $l1.l_discount,
>         "l_quantity": $l1.l_quantity,
>         "l_partkey": $l1.l_partkey,
>         "l_orderkey": $l1.l_orderkey,
>         "n_name": $l1.n_name,
>         "ps_supplycost": $ps.ps_supplycost
>       }
>     ) //l2
>     where contains($p.p_name, 'green') and $p.p_partkey = $l2.l_partkey    
>     return {
>       "l_extendedprice": $l2.l_extendedprice,
>       "l_discount": $l2.l_discount,
>       "l_quantity": $l2.l_quantity,
>       "l_orderkey": $l2.l_orderkey,
>       "n_name": $l2.n_name,
>       "ps_supplycost": $l2.ps_supplycost
>     }
>   ) //l3
>   where $o.o_orderkey = $l3.l_orderkey
>   let $amount := $l3.l_extendedprice * (1 - $l3.l_discount) -  $l3.ps_supplycost * $l3.l_quantity

>   let $o_year := get-year($o.o_orderdate)
>   return {
>     "nation": $l3.n_name,
>     "o_year": $o_year,
>     "amount": $amount
>   }
> ) //profit
> group by $nation := $profit.nation, $o_year := $profit.o_year with $profit
> order by $nation, $o_year desc
> return {
>   "nation": $nation, 
>   "o_year": $o_year, 
>   "sum_profit": sum( for $pr in $profit return $pr.amount )
> }
> //Here is the Schema
> create type NestedLineItemType as {
>   l_partkey: int64,
>   l_suppkey: int64,
>   l_linenumber: int32,
>   l_quantity: int32,
>   l_extendedprice: double,
>   l_discount: double,
>   l_tax: double,
>   l_returnflag: string,
>   l_linestatus: string,
>   l_shipdate: string,
>   l_commitdate: string,
>   l_receiptdate: string,
>   l_shipinstruct: string,
>   l_shipmode: string,
>   l_comment: string
> }
> create type NestedOrderType as {
>   o_orderkey: int64,
>   o_custkey: int64,
>   o_orderstatus: string,
>   o_totalprice: double,
>   o_orderdate: string,
>   o_orderpriority: string,
>   o_clerk: string,
>   o_shippriority: int32,
>   o_comment: string,
>   o_lineitems: [ NestedLineItemType ]
> }
> create type CustomerType as {
>   c_custkey: int64,
>   c_name: string,
>   c_address: string,
>   c_nationkey: int32,
>   c_phone: string,
>   c_acctbal: double,
>   c_mktsegment: string,
>   c_comment: string
> }
> create type PartType as {
>   p_partkey: int64, 
>   p_name: string,
>   p_mfgr: string,
>   p_brand: string,
>   p_type: string,
>   p_size: int32,
>   p_container: string,
>   p_retailprice: double,
>   p_comment: string
> }
> create type PartSuppType as {
>   ps_partkey: int64,
>   ps_suppkey: int64,
>   ps_availqty: int32,
>   ps_supplycost: double,
>   ps_comment: string
> }
> create type SupplierType as {
>   s_suppkey: int64,
>   s_name: string,
>   s_address: string,
>   s_nationkey: int32,
>   s_phone: string,
>   s_acctbal: double,
>   s_comment: string
> }
> create type NationType as {
>   n_nationkey: int32,
>   n_name: string,
>   n_regionkey: int32,
>   n_comment: string
> }
> create type RegionType as {
>   r_regionkey: int32,
>   r_name: string,
>   r_comment: string
> }
> create dataset NestedOrders(NestedOrderType)  primary key o_orderkey;
> create dataset Customer(CustomerType) primary key c_custkey;
> create dataset Part(PartType)         primary key p_partkey;
> create dataset Partsupp(PartSuppType) primary key ps_partkey, ps_suppkey;
> create dataset Supplier(SupplierType) primary key s_suppkey;
> create dataset Region(RegionType)     primary key r_regionkey;
> create dataset Nation(NationType)     primary key n_nationkey;
> create index nation_fk_region on Nation(n_regionkey);
> create index supplier_fk_nation on Supplier (s_nationkey);
> create index partsupp_fk_part on Partsupp (ps_partkey);
> create index partsupp_fk_supplier on Partsupp (ps_suppkey);
> create index customer_fk_nation on Customer (c_nationkey);
> create index nestedOrders_fk_customer on NestedOrders (o_custkey);
> create index nestedOrders_orderdateIx on NestedOrders (o_orderdate);
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message