drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Chun Chang (JIRA)" <j...@apache.org>
Subject [jira] [Created] (DRILL-1434) count() on null value gives zero
Date Thu, 18 Sep 2014 17:30:34 GMT
Chun Chang created DRILL-1434:
---------------------------------

             Summary: count() on null value gives zero
                 Key: DRILL-1434
                 URL: https://issues.apache.org/jira/browse/DRILL-1434
             Project: Apache Drill
          Issue Type: Bug
          Components: Functions - Drill
    Affects Versions: 0.6.0
            Reporter: Chun Chang


code base 
#Fri Sep 12 14:08:02 PDT 2014
git.commit.id.abbrev=9e16466

I have a parquet file (tpcds data) which contains null value on a column. The total count
of the column:

0: jdbc:drill:schema=dfs> select count(ss_quantity) from `tpcds/p1/store_sales.parquet`;
+------------+
|   EXPR$0   |
+------------+
| 2880404    |
+------------+

The count without considering null is:

0: jdbc:drill:schema=dfs> select count(ss_quantity) from `tpcds/p1/store_sales.parquet`
where ss_quantity is not null;
+------------+
|   EXPR$0   |
+------------+
| 2750408    |
+------------+

But the count for null value is zero:

0: jdbc:drill:schema=dfs> select count(ss_quantity) from `tpcds/p1/store_sales.parquet`
where ss_quantity is null;
+------------+
|   EXPR$0   |
+------------+
| 0          |
+------------+

Here is the physical plan look like for this query:

0: jdbc:drill:schema=dfs> explain plan for select count(ss_quantity) from `tpcds/p1/store_sales.parquet`
where ss_quantity is null;
+------------+------------+
|    text    |    json    |
+------------+------------+
| 00-00    Screen
00-01      StreamAgg(group=[{}], EXPR$0=[COUNT($0)])
00-02        Filter(condition=[IS NULL($0)])
00-03          ProducerConsumer
00-04            Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=maprfs:/user/root/mondrian/tpcds/p1/store_sales.parquet]],
selectionRoot=/user/root/mondrian/tpcds/p1/store_sales.parquet, columns=[SchemaPath [`ss_quantity`]]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "parquet-scan",
    "@id" : 4,
    "entries" : [ {
      "path" : "maprfs:/user/root/mondrian/tpcds/p1/store_sales.parquet"
    } ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "maprfs:///",
      "workspaces" : {
        "default" : {
          "location" : "/user/root/mondrian/",
          "writable" : true,
          "storageformat" : null
        },
        "home" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "root" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "abhi" : {
          "location" : "/tables",
          "writable" : true,
          "storageformat" : "csv"
        },
        "chun" : {
          "location" : "/drill/testdata/chun/",
          "writable" : false,
          "storageformat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "storageformat" : "csv"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "parquet"
    },
    "columns" : [ "`ss_quantity`" ],
    "selectionRoot" : "/user/root/mondrian/tpcds/p1/store_sales.parquet",
    "cost" : 2880404.0
  }, {
    "pop" : "producer-consumer",
    "@id" : 3,
    "child" : 4,
    "size" : 10,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 2880404.0
  }, {
    "pop" : "filter",
    "@id" : 2,
    "child" : 3,
    "expr" : "isnull(`ss_quantity`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 720101.0
  }, {
    "pop" : "streaming-aggregate",
    "@id" : 1,
    "child" : 2,
    "keys" : [ ],
    "exprs" : [ {
      "ref" : "`EXPR$0`",
      "expr" : "count(`ss_quantity`) "
    } ],
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1.0
  }, {
    "pop" : "screen",
    "@id" : 0,
    "child" : 1,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 72010.1
  } ]
} |
+------------+------------+



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message