drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Tomer Shiran (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (DRILL-1629) Cast issue during query
Date Mon, 03 Nov 2014 05:51:34 GMT

    [ https://issues.apache.org/jira/browse/DRILL-1629?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14194274#comment-14194274
] 

Tomer Shiran commented on DRILL-1629:
-------------------------------------

The plans were being truncated. Here they are again:

{code}
0: jdbc:drill:zk=local> EXPLAIN PLAN FOR SELECT u.user_id, u.name, count(*) reviews FROM
mongo.yelp.users u, dfs.yelp.`yelp_academic_dataset_review.json` r WHERE u.user_id = r.user_id
GROUP BY u.user_id, u.name ORDER BY reviews DESC LIMIT 10;
+------------+------------+
|    text    |    json    |
+------------+------------+
| 00-00    Screen
00-01      Project(user_id=[$0], name=[$1], reviews=[$2])
00-02        SelectionVectorRemover
00-03          Limit(fetch=[10])
00-04            SingleMergeExchange(sort0=[2 DESC])
01-01              SelectionVectorRemover
01-02                TopN(limit=[10])
01-03                  HashToRandomExchange(dist0=[[$2]])
02-01                    HashAgg(group=[{0, 1}], reviews=[COUNT()])
02-02                      Project(user_id=[$0], name=[$1])
02-03                        HashJoin(condition=[=($0, $2)], joinType=[inner])
02-05                          HashToRandomExchange(dist0=[[$0]])
03-01                            Scan(groupscan=[MongoGroupScan [MongoScanSpec=MongoScanSpec
[dbName=yelp, collectionName=users, filters=null], columns=[SchemaPath [`user_id`], SchemaPath
[`name`]]]])
02-04                          HashToRandomExchange(dist0=[[$0]])
04-01                            Project(T9¦¦*=[$0])
04-02                              Project(T9¦¦*=[$0], T9¦¦user_id=[$1])
04-03                                Scan(groupscan=[EasyGroupScan [selectionRoot=/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json,
numFiles=1, columns = [SchemaPath [`*`]]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "mongo-scan",
    "@id" : 196609,
    "mongoScanSpec" : {
      "dbName" : "yelp",
      "collectionName" : "users",
      "filters" : null
    },
    "storage" : {
      "type" : "mongo",
      "connection" : "mongodb://localhost:27017/",
      "enabled" : true
    },
    "columns" : [ "`user_id`", "`name`" ],
    "cost" : 252898.0
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 131077,
    "child" : 196609,
    "expr" : "hash(`user_id`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 252898.0
  }, {
    "pop" : "fs-scan",
    "@id" : 262147,
    "files" : [ "file:/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json" ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "file:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "storageformat" : "csv"
        },
        "yelp" : {
          "location" : "/Users/tshiran/Development/yelp",
          "writable" : false,
          "storageformat" : "json"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "json"
    },
    "columns" : [ "`*`" ],
    "selectionRoot" : "/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json",
    "cost" : 1021244.0
  }, {
    "pop" : "project",
    "@id" : 262146,
    "exprs" : [ {
      "ref" : "`T9¦¦*`",
      "expr" : "`*`"
    } ],
    "child" : 262147,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1021244.0
  }, {
    "pop" : "project",
    "@id" : 262145,
    "exprs" : [ {
      "ref" : "`T9¦¦*`",
      "expr" : "`T9¦¦*`"
    } ],
    "child" : 262146,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1021244.0
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 131076,
    "child" : 262145,
    "expr" : "hash(`T9¦¦*`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1021244.0
  }, {
    "pop" : "hash-join",
    "@id" : 131075,
    "left" : 131077,
    "right" : 131076,
    "conditions" : [ {
      "relationship" : "==",
      "left" : "`user_id`",
      "right" : "`T9¦¦*`"
    } ],
    "joinType" : "INNER",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1021244.0
  }, {
    "pop" : "project",
    "@id" : 131074,
    "exprs" : [ {
      "ref" : "`user_id`",
      "expr" : "`user_id`"
    }, {
      "ref" : "`name`",
      "expr" : "`name`"
    } ],
    "child" : 131075,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1021244.0
  }, {
    "pop" : "hash-aggregate",
    "@id" : 131073,
    "child" : 131074,
    "cardinality" : 1.0,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 765933.0,
    "groupByExprs" : [ {
      "ref" : "`user_id`",
      "expr" : "`user_id`"
    }, {
      "ref" : "`name`",
      "expr" : "`name`"
    } ],
    "aggrExprs" : [ {
      "ref" : "`reviews`",
      "expr" : "count(1) "
    } ]
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 65539,
    "child" : 131073,
    "expr" : "hash(`reviews`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "top-n",
    "@id" : 65538,
    "child" : 65539,
    "orderings" : [ {
      "order" : "DESC",
      "expr" : "`reviews`",
      "nullDirection" : "UNSPECIFIED"
    } ],
    "reverse" : false,
    "limit" : 10,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 65537,
    "child" : 65538,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "single-merge-exchange",
    "@id" : 4,
    "child" : 65537,
    "orderings" : [ {
      "order" : "DESC",
      "expr" : "`reviews`",
      "nullDirection" : "UNSPECIFIED"
    } ],
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "limit",
    "@id" : 3,
    "child" : 4,
    "first" : 0,
    "last" : 10,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 2,
    "child" : 3,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "project",
    "@id" : 1,
    "exprs" : [ {
      "ref" : "`user_id`",
      "expr" : "`user_id`"
    }, {
      "ref" : "`name`",
      "expr" : "`name`"
    }, {
      "ref" : "`reviews`",
      "expr" : "`reviews`"
    } ],
    "child" : 2,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "screen",
    "@id" : 0,
    "child" : 1,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  } ]
} |
+------------+------------+
1 row selected (0.342 seconds)

0: jdbc:drill:zk=local> EXPLAIN PLAN FOR SELECT u.user_id, u.name, count(*) reviews FROM
dfs.yelp.`yelp_academic_dataset_user.json` u, dfs.yelp.`yelp_academic_dataset_review.json`
r WHERE u.user_id = r.user_id GROUP BY u.user_id, u.name ORDER BY reviews DESC LIMIT 10;
+------------+------------+
|    text    |    json    |
+------------+------------+
| 00-00    Screen
00-01      Project(user_id=[$0], name=[$1], reviews=[$2])
00-02        SelectionVectorRemover
00-03          Limit(fetch=[10])
00-04            SingleMergeExchange(sort0=[2 DESC])
01-01              SelectionVectorRemover
01-02                TopN(limit=[10])
01-03                  HashToRandomExchange(dist0=[[$2]])
02-01                    HashAgg(group=[{0, 1}], reviews=[COUNT()])
02-02                      Project(user_id=[$0], name=[$1])
02-03                        HashJoin(condition=[=($0, $2)], joinType=[inner])
02-05                          HashToRandomExchange(dist0=[[$0]])
03-01                            Scan(groupscan=[EasyGroupScan [selectionRoot=/Users/tshiran/Development/yelp/yelp_academic_dataset_user.json,
numFiles=1, columns = [SchemaPath [`user_id`], SchemaPath [`name`]]]])
02-04                          Project(user_id0=[$0])
02-06                            HashToRandomExchange(dist0=[[$0]])
04-01                              Scan(groupscan=[EasyGroupScan [selectionRoot=/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json,
numFiles=1, columns = [SchemaPath [`user_id`]]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "fs-scan",
    "@id" : 262145,
    "files" : [ "file:/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json" ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "file:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "storageformat" : "csv"
        },
        "yelp" : {
          "location" : "/Users/tshiran/Development/yelp",
          "writable" : false,
          "storageformat" : "json"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "json"
    },
    "columns" : [ "`user_id`" ],
    "selectionRoot" : "/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json",
    "cost" : 1021244.0
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 131078,
    "child" : 262145,
    "expr" : "hash(`user_id`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1021244.0
  }, {
    "pop" : "project",
    "@id" : 131076,
    "exprs" : [ {
      "ref" : "`user_id0`",
      "expr" : "`user_id`"
    } ],
    "child" : 131078,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1021244.0
  }, {
    "pop" : "fs-scan",
    "@id" : 196609,
    "files" : [ "file:/Users/tshiran/Development/yelp/yelp_academic_dataset_user.json" ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "file:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "storageformat" : "csv"
        },
        "yelp" : {
          "location" : "/Users/tshiran/Development/yelp",
          "writable" : false,
          "storageformat" : "json"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "json"
    },
    "columns" : [ "`user_id`", "`name`" ],
    "selectionRoot" : "/Users/tshiran/Development/yelp/yelp_academic_dataset_user.json",
    "cost" : 115995.0
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 131077,
    "child" : 196609,
    "expr" : "hash(`user_id`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 115995.0
  }, {
    "pop" : "hash-join",
    "@id" : 131075,
    "left" : 131077,
    "right" : 131076,
    "conditions" : [ {
      "relationship" : "==",
      "left" : "`user_id`",
      "right" : "`user_id0`"
    } ],
    "joinType" : "INNER",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1021244.0
  }, {
    "pop" : "project",
    "@id" : 131074,
    "exprs" : [ {
      "ref" : "`user_id`",
      "expr" : "`user_id`"
    }, {
      "ref" : "`name`",
      "expr" : "`name`"
    } ],
    "child" : 131075,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1021244.0
  }, {
    "pop" : "hash-aggregate",
    "@id" : 131073,
    "child" : 131074,
    "cardinality" : 1.0,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 765933.0,
    "groupByExprs" : [ {
      "ref" : "`user_id`",
      "expr" : "`user_id`"
    }, {
      "ref" : "`name`",
      "expr" : "`name`"
    } ],
    "aggrExprs" : [ {
      "ref" : "`reviews`",
      "expr" : "count(1) "
    } ]
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 65539,
    "child" : 131073,
    "expr" : "hash(`reviews`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "top-n",
    "@id" : 65538,
    "child" : 65539,
    "orderings" : [ {
      "order" : "DESC",
      "expr" : "`reviews`",
      "nullDirection" : "UNSPECIFIED"
    } ],
    "reverse" : false,
    "limit" : 10,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 65537,
    "child" : 65538,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "single-merge-exchange",
    "@id" : 4,
    "child" : 65537,
    "orderings" : [ {
      "order" : "DESC",
      "expr" : "`reviews`",
      "nullDirection" : "UNSPECIFIED"
    } ],
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "limit",
    "@id" : 3,
    "child" : 4,
    "first" : 0,
    "last" : 10,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 2,
    "child" : 3,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "project",
    "@id" : 1,
    "exprs" : [ {
      "ref" : "`user_id`",
      "expr" : "`user_id`"
    }, {
      "ref" : "`name`",
      "expr" : "`name`"
    }, {
      "ref" : "`reviews`",
      "expr" : "`reviews`"
    } ],
    "child" : 2,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  }, {
    "pop" : "screen",
    "@id" : 0,
    "child" : 1,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 102124.4
  } ]
} |
+------------+------------+
1 row selected (0.26 seconds)
0: jdbc:drill:zk=local> 
{code}

> Cast issue during query
> -----------------------
>
>                 Key: DRILL-1629
>                 URL: https://issues.apache.org/jira/browse/DRILL-1629
>             Project: Apache Drill
>          Issue Type: Bug
>            Reporter: Tomer Shiran
>
> Imported Yelp user.json file into a local MongoDB instance (via mongoimport). Ran the
following query to find the names of the users who had the most reviews:
> {code}
> 0: jdbc:drill:zk=local> SELECT u.user_id, u.name, count(*) reviews FROM mongo.yelp.users
u, dfs.yelp.`yelp_academic_dataset_review.json` r WHERE u.user_id = r.user_id GROUP BY u.user_id,
u.name ORDER BY reviews DESC LIMIT 10;
> Query failed: Failure while running fragment. Failure finding function that runtime code
generation expected.  Signature: compare_to( VARCHAR:OPTIONALINT:OPTIONAL,  ) returns INT:REQUIRED
[c796f1fb-b73a-49e8-8b4a-ed973d89b7d3]
> Error: exception while executing query: Failure while trying to get next result batch.
(state=,code=0)
> {code}
> Here's the explain plan:
> {code}
> 0: jdbc:drill:zk=local> EXPLAIN PLAN FOR SELECT u.user_id, u.name, count(*) reviews
FROM mongo.yelp.users u, dfs.yelp.`yelp_academic_dataset_review.json` r WHERE u.user_id =
r.user_id GROUP BY u.user_id, u.name ORDER BY reviews DESC LIMIT 10;
> +------------+------------+
> |    text    |    json    |
> +------------+------------+
> | 00-00    Screen
> 00-01      Project(user_id=[$0], name=[$1], reviews=[$2])
> 00-02        SelectionVectorRemover
> 00-03          Limit(fetch=[10])
> 00-04            SingleMergeExchange(sort0=[2 DESC])
> 01-01              SelectionVectorRemover
> 01-02                TopN(limit=[10])
> 01-03                  HashToRandomExchange(dist0=[[$2]])
> 02-01                    HashAgg(group=[{0, 1}], reviews=[COUNT()])
> 02-02                      Project(user_id=[$0], name=[$1])
> 02-03                        HashJoin(condition=[=($0, $2)], joinType=[inner])
> 02-05                          HashToRandomExchange(dist0=[[$0]])
> 03-01                            Scan(groupscan=[MongoGroupScan [MongoScanSpec=MongoScanSpec
[dbName=yelp, collectionName=users, filters=null], columns=[SchemaPath [`user_id`], SchemaPath
[`name`]]]])
> 02-04                          HashToRandomExchange(dist0=[[$0]])
> 04-01                            Project(T8¦¦*=[$0])
> 04-02                              Project(T8¦¦*=[$0], T8¦¦user_id=[$ |
> +------------+------------+
> 1 row selected (0.42 seconds)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message