drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Bob Rumsby (JIRA)" <j...@apache.org>
Subject [jira] [Created] (DRILL-1417) Query against directory of JSON files no longer works in 0.5
Date Sun, 14 Sep 2014 03:58:33 GMT
Bob Rumsby created DRILL-1417:
---------------------------------

             Summary: Query against directory of JSON files no longer works in 0.5
                 Key: DRILL-1417
                 URL: https://issues.apache.org/jira/browse/DRILL-1417
             Project: Apache Drill
          Issue Type: Bug
          Components: Query Planning & Optimization
    Affects Versions: 0.5.0
         Environment: Apache Drill Sandbox (http://builds.qa.lab/vm/ova/?C=M;O=A) running
on VirtualBox VM. 

You can also access the same logs data set as follows:
ssh root@andypdemo0 [pswd = mapr]
cd /mapr/demorig/data/flat
            Reporter: Bob Rumsby


This query was working on an earlier build (somewhere between the 0.4 and 0.5 releases) but
no longer works:

0: jdbc:drill:> select dir1 month_no, count(*) month_count from logs where dir0=2014 group
by dir1 order by dir1;
Query failed: Failure while running fragment. json record reader requires at least a column
[90feae61-fcfa-427f-b0bf-4a06563aa9a9]

Error: exception while executing query: Failure while trying to get next result batch. (state=,code=0)

Let me know if I should open a Jira, or if I need to rewrite the query. I am using the latest
build of the MapR Sandbox for Drill from http://builds.qa.lab/vm/ova/?C=M;O=A

The old result of the query is here:

0: jdbc:drill:> select dir1 month_no, count(*) month_count from logs where dir0=2014 group
by dir1 order by dir1;
+------------+-------------+
|  month_no  | month_count |
+------------+-------------+
| 1          | 1741        |
| 2          | 1538        |
| 3          | 1689        |
| 4          | 1675        |
| 5          | 1738        |
| 6          | 1653        |
| 7          | 1745        |
| 8          | 221         |
+------------+-------------+
8 rows selected

EXPLAIN TEXT
---------------------
0: jdbc:drill:> explain plan for select dir1 month_no, count(*) month_count from logs where
dir0=2014 group by dir1 order by dir1;
+------------+------------+
|    text    |    json    |
+------------+------------+
| 00-00    Screen
00-01      Project(month_no=[$0], month_count=[$1])
00-02        SelectionVectorRemover
00-03          Sort(sort0=[$0], dir0=[ASC])
00-04            Project(month_no=[$0], month_count=[$1])
00-05              HashAgg(group=[{0}], month_count=[COUNT()])
00-06                Project(dir1=[$0])
00-07                  SelectionVectorRemover
00-08                    Filter(condition=[=(CAST($1):INTEGER, 2014)])
00-09                      Scan(groupscan=[EasyGroupScan [selectionRoot=/mapr/demo.mapr.com/data/flat/logs,
columns = [SchemaPath [`dir1`], SchemaPath [`dir0`]]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "fs-scan",
    "@id" : 9,
    "files" : [ "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/8/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/11/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/6/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/12/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/3/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/4/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/5/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/1/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/7/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/2/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/9/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/10/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/8/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/11/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/6/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/12/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/3/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/4/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/5/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/1/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/7/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/2/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/9/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/10/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/8/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/6/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/3/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/4/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/5/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/1/log.json", "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/7/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/2/log.json" ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "maprfs:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "data" : {
          "location" : "/mapr/demo.mapr.com/data",
          "writable" : false,
          "storageformat" : null
        },
        "clicks" : {
          "location" : "/mapr/demo.mapr.com/data/nested",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "logs" : {
          "location" : "/mapr/demo.mapr.com/data/flat",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "views" : {
          "location" : "/mapr/demo.mapr.com/data/views",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "storageformat" : "csv"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "json"
    },
    "columns" : [ "`dir1`", "`dir0`" ],
    "selectionRoot" : "/mapr/demo.mapr.com/data/flat/logs",
    "cost" : 7731.0
  }, {
    "pop" : "filter",
    "@id" : 8,
    "child" : 9,
    "expr" : "equal(cast( (`dir0` ) as INT ), 2014) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1159.6499999999999
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 7,
    "child" : 8,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1159.6499999999999
  }, {
    "pop" : "project",
    "@id" : 6,
    "exprs" : [ {
      "ref" : "`dir1`",
      "expr" : "`dir1`"
    } ],
    "child" : 7,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1159.6499999999999
  }, {
    "pop" : "hash-aggregate",
    "@id" : 5,
    "child" : 6,
    "cardinality" : 1.0,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 579.8249999999999,
    "groupByExprs" : [ {
      "ref" : "`dir1`",
      "expr" : "`dir1`"
    } ],
    "aggrExprs" : [ {
      "ref" : "`month_count`",
      "expr" : "count(1) "
    } ]
  }, {
    "pop" : "project",
    "@id" : 4,
    "exprs" : [ {
      "ref" : "`month_no`",
      "expr" : "`dir1`"
    }, {
      "ref" : "`month_count`",
      "expr" : "`month_count`"
    } ],
    "child" : 5,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 115.96499999999999
  }, {
    "pop" : "external-sort",
    "@id" : 3,
    "child" : 4,
    "orderings" : [ {
      "order" : "ASC",
      "expr" : "`month_no`",
      "nullDirection" : "UNSPECIFIED"
    } ],
    "reverse" : false,
    "initialAllocation" : 20000000,
    "maxAllocation" : 10000000000,
    "cost" : 115.96499999999999
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 2,
    "child" : 3,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 115.96499999999999
  }, {
    "pop" : "project",
    "@id" : 1,
    "exprs" : [ {
      "ref" : "`month_no`",
      "expr" : "`month_no`"
    }, {
      "ref" : "`month_count`",
      "expr" : "`month_count`"
    } ],
    "child" : 2,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 115.96499999999999
  }, {
    "pop" : "screen",
    "@id" : 0,
    "child" : 1,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 115.96499999999999
  } ]
} |
+------------+------------+
1 row selected (0.86 seconds)



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message