Return-Path: X-Original-To: apmail-drill-issues-archive@minotaur.apache.org Delivered-To: apmail-drill-issues-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id F12F810677 for ; Wed, 17 Dec 2014 01:23:13 +0000 (UTC) Received: (qmail 44860 invoked by uid 500); 17 Dec 2014 01:23:13 -0000 Delivered-To: apmail-drill-issues-archive@drill.apache.org Received: (qmail 44680 invoked by uid 500); 17 Dec 2014 01:23:13 -0000 Mailing-List: contact issues-help@drill.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@drill.apache.org Delivered-To: mailing list issues@drill.apache.org Received: (qmail 44524 invoked by uid 99); 17 Dec 2014 01:23:13 -0000 Received: from arcas.apache.org (HELO arcas.apache.org) (140.211.11.28) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 17 Dec 2014 01:23:13 +0000 Date: Wed, 17 Dec 2014 01:23:13 +0000 (UTC) From: "Chun Chang (JIRA)" To: issues@drill.apache.org Message-ID: In-Reply-To: References: Subject: [jira] [Created] (DRILL-1880) Query with three where conditions returned wrong results MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394 Chun Chang created DRILL-1880: --------------------------------- Summary: Query with three where conditions returned wrong results Key: DRILL-1880 URL: https://issues.apache.org/jira/browse/DRILL-1880 Project: Apache Drill Issue Type: Bug Components: Execution - Flow Affects Versions: 0.7.0 Reporter: Chun Chang #Mon Dec 15 11:37:23 EST 2014 git.commit.id.abbrev=3b0ff5d The following query containing three where conditions returned wrong results. (data is too big so not included here.) {code} SELECT t.gbyi, Count(t.id) FROM `complex.json` t WHERE t.gbyi <= 5 OR t.gbyi >= 11 AND t.gbyt <> 'ooof' GROUP BY t.gbyi ORDER BY t.gbyi; {code} Wrong result, the count column is mostly wrong: {code} 0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.gbyi, count(t.id) from `complex.json` t where t.gbyi <= 5 or t.gbyi >= 11 and t.gbyt <> 'ooof' group by t.gbyi order by t.gbyi; +------------+------------+ | gbyi | EXPR$1 | +------------+------------+ | 0 | 66943 | | 1 | 66318 | | 2 | 66994 | | 3 | 66683 | | 4 | 66638 | | 5 | 66439 | | 11 | 63172 | | 12 | 63008 | | 13 | 62685 | | 14 | 62970 | +------------+------------+ {code} Reduce the where condition to just two gives the correct result: {code} SELECT t.gbyi, Count(t.id) FROM `complex.json` t WHERE t.gbyi <= 5 AND t.gbyt <> 'ooof' GROUP BY t.gbyi ORDER BY t.gbyi; {code} {code} 0: jdbc:drill:schema=dfs.drillTestDirComplexJ> select t.gbyi, count(t.id) from `complex.json` t where t.gbyi <= 5 and t.gbyt <> 'ooof' group by t.gbyi order by t.gbyi; +------------+------------+ | gbyi | EXPR$1 | +------------+------------+ | 0 | 63305 | | 1 | 62671 | | 2 | 63249 | | 3 | 63070 | | 4 | 62967 | | 5 | 62737 | +------------+------------+ {code} physical plan for the query returned wrong result: {code} 0: jdbc:drill:schema=dfs.drillTestDirComplexJ> explain plan for select t.gbyi, count(t.id) from `complex.json` t where t.gbyi <= 5 or t.gbyi >= 11 and t.gbyt <> 'ooof' group by t.gbyi order by t.gbyi; +------------+------------+ | text | json | +------------+------------+ | 00-00 Screen 00-01 Project(gbyi=[$0], EXPR$1=[$1]) 00-02 SelectionVectorRemover 00-03 Sort(sort0=[$0], dir0=[ASC]) 00-04 HashAgg(group=[{0}], EXPR$1=[$SUM0($1)]) 00-05 HashAgg(group=[{0}], EXPR$1=[COUNT($1)]) 00-06 Project(gbyi=[$0], id=[$2]) 00-07 SelectionVectorRemover 00-08 Filter(condition=[OR(<=($0, 5), AND(>=($0, 11), <>($1, 'ooof')))]) 00-09 Project(gbyi=[$1], gbyt=[$2], id=[$0]) 00-10 Scan(groupscan=[EasyGroupScan [selectionRoot=/drill/testdata/complex_type/json/complex.json, numFiles=1, columns=[`gbyi`, `gbyt`, `id`], files=[maprfs:/drill/testdata/complex_type/json/complex.json]]]) | { "head" : { "version" : 1, "generator" : { "type" : "ExplainHandler", "info" : "" }, "type" : "APACHE_DRILL_PHYSICAL", "options" : [ ], "queue" : 0, "resultMode" : "EXEC" }, "graph" : [ { "pop" : "fs-scan", "@id" : 10, "files" : [ "maprfs:/drill/testdata/complex_type/json/complex.json" ], "storage" : { "type" : "file", "enabled" : true, "connection" : "maprfs:///", "workspaces" : { "root" : { "location" : "/", "writable" : false, "defaultInputFormat" : null }, "tmp" : { "location" : "/tmp", "writable" : true, "defaultInputFormat" : "csv" }, "drillTestDir" : { "location" : "/drill/testdata/", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirComplexJson" : { "location" : "/drill/testdata/complex_type/json", "writable" : true, "defaultInputFormat" : "json" }, "drillTestDirAmplab" : { "location" : "/drill/testdata/amplab", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirInformationSchema" : { "location" : "/drill/testdata/information-schema", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirUdfs" : { "location" : "/drill/testdata/udfs/", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirP1" : { "location" : "/drill/testdata/p1tests", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirTpch10Parquet" : { "location" : "/drill/testdata/tpch10", "writable" : true, "defaultInputFormat" : "parquet" }, "Join" : { "location" : "/drill/testdata/join", "writable" : true, "defaultInputFormat" : "parquet" }, "NoExtJson" : { "location" : "/drill/testdata/no-extension/json", "writable" : true, "defaultInputFormat" : "json" }, "NoExtParquet" : { "location" : "/drill/testdata/no-extension/parquet", "writable" : true, "defaultInputFormat" : "parquet" }, "NoExtParquetNull" : { "location" : "/drill/testdata/no-extension/parquet", "writable" : true, "defaultInputFormat" : null }, "NoExtText" : { "location" : "/drill/testdata/no-extension/text", "writable" : true, "defaultInputFormat" : "psv" }, "drillTestDirExchanges" : { "location" : "/drill/testdata/exchanges_test", "writable" : true, "defaultInputFormat" : "parquet" }, "TpcHMulti" : { "location" : "/drill/testdata/tpch-multi", "writable" : true, "defaultInputFormat" : "parquet" }, "TpcHMulti100" : { "location" : "/drill/testdata/SF100", "writable" : true, "defaultInputFormat" : "parquet" }, "TpcHMulti1" : { "location" : "/drill/testdata/tpch_SF1", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirExplicit" : { "location" : "/drill/testdata/explicit_cast", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirImplicit" : { "location" : "/drill/testdata/implicit_cast", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirImplicit1" : { "location" : "/drill/testdata/implicit_cast", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirTPCDS" : { "location" : "/user/root/tpcds/parquet", "writable" : true, "defaultInputFormat" : "parquet" }, "TPCDS" : { "location" : "/drill/testdata/tpcds", "writable" : true, "defaultInputFormat" : "parquet" }, "drillMondrian" : { "location" : "/user/root/mondrian", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirDatetime" : { "location" : "/drill/testdata/datetime/datasources", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirViews" : { "location" : "/drill/testdata/views/", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirNumerical" : { "location" : "/drill/testdata/numerical/", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirJson" : { "location" : "/drill/testdata/json_storage/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTestNewWS" : { "location" : "/drill/testdata/newWS/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpch01Text" : { "location" : "/drill/testdata/Tpch0.01/text/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpch01Json" : { "location" : "/drill/testdata/Tpch0.01/json/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpch01Parquet" : { "location" : "/drill/testdata/Tpch0.01/parquet/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirConvert" : { "location" : "/drill/testdata/convert", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpch100Text" : { "location" : "/drill/testdata/tpch100/text/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpch100Parquet" : { "location" : "/drill/testdata/tpch100/parquet", "writable" : true, "defaultInputFormat" : null }, "drillTestDirAggregate1parquet" : { "location" : "/drill/testdata/tpcds/parquet/s1", "writable" : true, "defaultInputFormat" : null }, "drillTestDirAggregate1csv" : { "location" : "/drill/testdata/tpcds/csv/s1", "writable" : true, "defaultInputFormat" : null }, "drillTestDirAggregate1json" : { "location" : "/drill/testdata/tpcds/json/s1", "writable" : true, "defaultInputFormat" : null }, "drillTestDirMondrian" : { "location" : "/drill/testdata/mondrian", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpcdsImpalaSF1" : { "location" : "/drill/testdata/tpcds-impala-sf1", "writable" : true, "defaultInputFormat" : null }, "sandbox" : { "location" : "/sandbox", "writable" : true, "defaultInputFormat" : "parquet" }, "sandbox-logs" : { "location" : "/sandbox/flat", "writable" : true, "defaultInputFormat" : "parquet" }, "sandbox-json" : { "location" : "/sandbox/json", "writable" : true, "defaultInputFormat" : "parquet" } }, "formats" : { "psv" : { "type" : "text", "extensions" : [ "tbl" ], "delimiter" : "|" }, "dsv" : { "type" : "text", "extensions" : [ "dat" ], "delimiter" : "|" }, "csv" : { "type" : "text", "extensions" : [ "csv" ], "delimiter" : "," }, "tsv" : { "type" : "text", "extensions" : [ "tsv" ], "delimiter" : "\t" }, "parquet" : { "type" : "parquet" }, "json" : { "type" : "json" } } }, "format" : { "type" : "json" }, "columns" : [ "`gbyi`", "`gbyt`", "`id`" ], "selectionRoot" : "/drill/testdata/complex_type/json/complex.json", "cost" : 1186767.0 }, { "pop" : "project", "@id" : 9, "exprs" : [ { "ref" : "`gbyi`", "expr" : "`gbyi`" }, { "ref" : "`gbyt`", "expr" : "`gbyt`" }, { "ref" : "`id`", "expr" : "`id`" } ], "child" : 10, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 1186767.0 }, { "pop" : "filter", "@id" : 8, "child" : 9, "expr" : "booleanOr(less_than_or_equal_to(`gbyi`, 5) , booleanAnd(greater_than_or_equal_to(`gbyi`, 11) , not_equal(`gbyt`, 'ooof') ) ) ", "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 296691.75 }, { "pop" : "selection-vector-remover", "@id" : 7, "child" : 8, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 296691.75 }, { "pop" : "project", "@id" : 6, | +------------+------------+ {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)