drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Jacques Nadeau (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (DRILL-1880) Query with three where conditions returned wrong results
Date Sun, 28 Dec 2014 21:16:15 GMT

     [ https://issues.apache.org/jira/browse/DRILL-1880?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Jacques Nadeau updated DRILL-1880:
----------------------------------
    Fix Version/s: 0.8.0

> Query with three where conditions returned wrong results
> --------------------------------------------------------
>
>                 Key: DRILL-1880
>                 URL: https://issues.apache.org/jira/browse/DRILL-1880
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Execution - Flow
>    Affects Versions: 0.7.0
>            Reporter: Chun Chang
>             Fix For: 0.8.0
>
>
> #Mon Dec 15 11:37:23 EST 2014
> git.commit.id.abbrev=3b0ff5d
> The following query containing three where conditions returned wrong results. (data is
too big so not included here.)
> {code}
> SELECT t.gbyi, 
>        Count(t.id) 
> FROM   `complex.json` t 
> WHERE  t.gbyi <= 5 
>         OR t.gbyi >= 11 
>            AND t.gbyt <> 'ooof' 
> GROUP  BY t.gbyi 
> ORDER  BY t.gbyi; 
> {code}
> Wrong result, the count column is mostly wrong:
> {code}
> 0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.gbyi, count(t.id) from `complex.json`
t where t.gbyi <= 5 or t.gbyi >= 11 and t.gbyt <> 'ooof' group by t.gbyi order
by t.gbyi;
> +------------+------------+
> |    gbyi    |   EXPR$1   |
> +------------+------------+
> | 0          | 66943      |
> | 1          | 66318      |
> | 2          | 66994      |
> | 3          | 66683      |
> | 4          | 66638      |
> | 5          | 66439      |
> | 11         | 63172      |
> | 12         | 63008      |
> | 13         | 62685      |
> | 14         | 62970      |
> +------------+------------+
> {code}
> Reduce the where condition to just two gives the correct result:
> {code}
> SELECT t.gbyi, 
>        Count(t.id) 
> FROM   `complex.json` t 
> WHERE  t.gbyi <= 5 
>        AND t.gbyt <> 'ooof' 
> GROUP  BY t.gbyi 
> ORDER  BY t.gbyi; 
> {code}
> {code}
> 0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.gbyi, count(t.id) from `complex.json`
t where t.gbyi <= 5 and t.gbyt <> 'ooof' group by t.gbyi order by t.gbyi;
> +------------+------------+
> |    gbyi    |   EXPR$1   |
> +------------+------------+
> | 0          | 63305      |
> | 1          | 62671      |
> | 2          | 63249      |
> | 3          | 63070      |
> | 4          | 62967      |
> | 5          | 62737      |
> +------------+------------+
> {code}
> physical plan for the query returned wrong result:
> {code}
> 0: jdbc:drill:schema=dfs.drillTestDirComplexJ> explain plan for select t.gbyi, count(t.id)
from `complex.json` t where t.gbyi <= 5 or t.gbyi >= 11 and t.gbyt <> 'ooof' group
by t.gbyi order by t.gbyi;
> +------------+------------+
> |    text    |    json    |
> +------------+------------+
> | 00-00    Screen
> 00-01      Project(gbyi=[$0], EXPR$1=[$1])
> 00-02        SelectionVectorRemover
> 00-03          Sort(sort0=[$0], dir0=[ASC])
> 00-04            HashAgg(group=[{0}], EXPR$1=[$SUM0($1)])
> 00-05              HashAgg(group=[{0}], EXPR$1=[COUNT($1)])
> 00-06                Project(gbyi=[$0], id=[$2])
> 00-07                  SelectionVectorRemover
> 00-08                    Filter(condition=[OR(<=($0, 5), AND(>=($0, 11), <>($1,
'ooof')))])
> 00-09                      Project(gbyi=[$1], gbyt=[$2], id=[$0])
> 00-10                        Scan(groupscan=[EasyGroupScan [selectionRoot=/drill/testdata/complex_type/json/complex.json,
numFiles=1, columns=[`gbyi`, `gbyt`, `id`], files=[maprfs:/drill/testdata/complex_type/json/complex.json]]])
>  | {
>   "head" : {
>     "version" : 1,
>     "generator" : {
>       "type" : "ExplainHandler",
>       "info" : ""
>     },
>     "type" : "APACHE_DRILL_PHYSICAL",
>     "options" : [ ],
>     "queue" : 0,
>     "resultMode" : "EXEC"
>   },
>   "graph" : [ {
>     "pop" : "fs-scan",
>     "@id" : 10,
>     "files" : [ "maprfs:/drill/testdata/complex_type/json/complex.json" ],
>     "storage" : {
>       "type" : "file",
>       "enabled" : true,
>       "connection" : "maprfs:///",
>       "workspaces" : {
>         "root" : {
>           "location" : "/",
>           "writable" : false,
>           "defaultInputFormat" : null
>         },
>         "tmp" : {
>           "location" : "/tmp",
>           "writable" : true,
>           "defaultInputFormat" : "csv"
>         },
>         "drillTestDir" : {
>           "location" : "/drill/testdata/",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirComplexJson" : {
>           "location" : "/drill/testdata/complex_type/json",
>           "writable" : true,
>           "defaultInputFormat" : "json"
>         },
>         "drillTestDirAmplab" : {
>           "location" : "/drill/testdata/amplab",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirInformationSchema" : {
>           "location" : "/drill/testdata/information-schema",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirUdfs" : {
>           "location" : "/drill/testdata/udfs/",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirP1" : {
>           "location" : "/drill/testdata/p1tests",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirTpch10Parquet" : {
>           "location" : "/drill/testdata/tpch10",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "Join" : {
>           "location" : "/drill/testdata/join",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "NoExtJson" : {
>           "location" : "/drill/testdata/no-extension/json",
>           "writable" : true,
>           "defaultInputFormat" : "json"
>         },
>         "NoExtParquet" : {
>           "location" : "/drill/testdata/no-extension/parquet",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "NoExtParquetNull" : {
>           "location" : "/drill/testdata/no-extension/parquet",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "NoExtText" : {
>           "location" : "/drill/testdata/no-extension/text",
>           "writable" : true,
>           "defaultInputFormat" : "psv"
>         },
>         "drillTestDirExchanges" : {
>           "location" : "/drill/testdata/exchanges_test",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "TpcHMulti" : {
>           "location" : "/drill/testdata/tpch-multi",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "TpcHMulti100" : {
>           "location" : "/drill/testdata/SF100",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "TpcHMulti1" : {
>           "location" : "/drill/testdata/tpch_SF1",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirExplicit" : {
>           "location" : "/drill/testdata/explicit_cast",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirImplicit" : {
>           "location" : "/drill/testdata/implicit_cast",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirImplicit1" : {
>           "location" : "/drill/testdata/implicit_cast",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirTPCDS" : {
>           "location" : "/user/root/tpcds/parquet",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "TPCDS" : {
>           "location" : "/drill/testdata/tpcds",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillMondrian" : {
>           "location" : "/user/root/mondrian",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirDatetime" : {
>           "location" : "/drill/testdata/datetime/datasources",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirViews" : {
>           "location" : "/drill/testdata/views/",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirNumerical" : {
>           "location" : "/drill/testdata/numerical/",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "drillTestDirJson" : {
>           "location" : "/drill/testdata/json_storage/",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirTestNewWS" : {
>           "location" : "/drill/testdata/newWS/",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirTpch01Text" : {
>           "location" : "/drill/testdata/Tpch0.01/text/",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirTpch01Json" : {
>           "location" : "/drill/testdata/Tpch0.01/json/",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirTpch01Parquet" : {
>           "location" : "/drill/testdata/Tpch0.01/parquet/",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirConvert" : {
>           "location" : "/drill/testdata/convert",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirTpch100Text" : {
>           "location" : "/drill/testdata/tpch100/text/",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirTpch100Parquet" : {
>           "location" : "/drill/testdata/tpch100/parquet",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirAggregate1parquet" : {
>           "location" : "/drill/testdata/tpcds/parquet/s1",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirAggregate1csv" : {
>           "location" : "/drill/testdata/tpcds/csv/s1",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirAggregate1json" : {
>           "location" : "/drill/testdata/tpcds/json/s1",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirMondrian" : {
>           "location" : "/drill/testdata/mondrian",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "drillTestDirTpcdsImpalaSF1" : {
>           "location" : "/drill/testdata/tpcds-impala-sf1",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "sandbox" : {
>           "location" : "/sandbox",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "sandbox-logs" : {
>           "location" : "/sandbox/flat",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         },
>         "sandbox-json" : {
>           "location" : "/sandbox/json",
>           "writable" : true,
>           "defaultInputFormat" : "parquet"
>         }
>       },
>       "formats" : {
>         "psv" : {
>           "type" : "text",
>           "extensions" : [ "tbl" ],
>           "delimiter" : "|"
>         },
>         "dsv" : {
>           "type" : "text",
>           "extensions" : [ "dat" ],
>           "delimiter" : "|"
>         },
>         "csv" : {
>           "type" : "text",
>           "extensions" : [ "csv" ],
>           "delimiter" : ","
>         },
>         "tsv" : {
>           "type" : "text",
>           "extensions" : [ "tsv" ],
>           "delimiter" : "\t"
>         },
>         "parquet" : {
>           "type" : "parquet"
>         },
>         "json" : {
>           "type" : "json"
>         }
>       }
>     },
>     "format" : {
>       "type" : "json"
>     },
>     "columns" : [ "`gbyi`", "`gbyt`", "`id`" ],
>     "selectionRoot" : "/drill/testdata/complex_type/json/complex.json",
>     "cost" : 1186767.0
>   }, {
>     "pop" : "project",
>     "@id" : 9,
>     "exprs" : [ {
>       "ref" : "`gbyi`",
>       "expr" : "`gbyi`"
>     }, {
>       "ref" : "`gbyt`",
>       "expr" : "`gbyt`"
>     }, {
>       "ref" : "`id`",
>       "expr" : "`id`"
>     } ],
>     "child" : 10,
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 1186767.0
>   }, {
>     "pop" : "filter",
>     "@id" : 8,
>     "child" : 9,
>     "expr" : "booleanOr(less_than_or_equal_to(`gbyi`, 5) , booleanAnd(greater_than_or_equal_to(`gbyi`,
11) , not_equal(`gbyt`, 'ooof') ) ) ",
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 296691.75
>   }, {
>     "pop" : "selection-vector-remover",
>     "@id" : 7,
>     "child" : 8,
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 296691.75
>   }, {
>     "pop" : "project",
>     "@id" : 6,
>    |
> +------------+------------+
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message