drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Anton Fernando (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (DRILL-4553) Joins using views are not returning results.
Date Wed, 30 Mar 2016 02:45:25 GMT

    [ https://issues.apache.org/jira/browse/DRILL-4553?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15217288#comment-15217288
] 

Anton Fernando commented on DRILL-4553:
---------------------------------------

This is over JSON and CSV, in this scenario the security metadata is in csv and the first
view is created over it.  Views 2 and 3 are used to secure data in JSON with the security
metadata in csv.  We are currently evaluating Drill to see if it is a good fit to analyze
healthcare data and we have run into this issue.  The explain plan for the query that is not
returning data is as follows:

0: jdbc:drill:zk=localhost:2181> explain plan for select a.facilityidentifier, a.encounteridentifier
from dischargedetail a, dischargephysn b where a.encounteridentifier=b.encounteridentifier
and a.facilityidentifier=b.facilityidentifier;
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(facilityidentifier=[$0], encounteridentifier=[$1])
00-02        Project(facilityidentifier=[$1], encounteridentifier=[$0])
00-03          Project(EncounterIdentifier=[$2], FacilityIdentifier=[$3], EncounterIdentifier0=[$0],
FacilityIdentifier0=[$1])
00-04            HashJoin(condition=[AND(=($2, $0), =($3, $1))], joinType=[inner])
00-06              Project(EncounterIdentifier=[$0], FacilityIdentifier=[$1])
00-08                HashJoin(condition=[AND(=($1, $13), =($2, $14))], joinType=[inner])
00-11                  Project(EncounterIdentifier=[$0], FacilityIdentifier=[$1], SettingOfCare=[$2],
ITEM=[ITEM($3, 'MedicalProfessionalIdentifierRaw')], ITEM4=[ITEM($3, 'MedicalProfessionalRoleCodeRaw')],
ITEM5=[ITEM($3, 'MedicalProfessionalRoleCode')], ITEM6=[ITEM($3, 'FirstNameRaw')], ITEM7=[ITEM($3,
'LastNameRaw')], ITEM8=[ITEM($3, 'MiddleNameRaw')], ITEM9=[ITEM($3, 'MedicalProfessionalPrimarySpecialtyRaw')],
ITEM10=[ITEM($3, 'MedicalProfessionalSecondarySpecialtyRaw')], ITEM11=[ITEM($3, 'NationalProviderIdentifierRaw')],
ITEM12=[ITEM($3, 'UniformProviderIdentifierRaw')])
00-14                    Flatten(flattenField=[$3])
00-17                      Project(EncounterIdentifier=[$0], FacilityIdentifier=[ITEM($1,
'FacilityIdentifier')], SettingOfCare=[$2], MedicalProfessionals=[$3])
00-21                        Scan(groupscan=[EasyGroupScan [selectionRoot=hdfs://sandbox.hortonworks.com:8020/tmp/json,
numFiles=3, columns=[`EncounterIdentifier`, `Facility`.`FacilityIdentifier`, `SettingOfCare`,
`MedicalProfessionals`], files=[hdfs://sandbox.hortonworks.com:8020/tmp/json/403Encounters.json,
hdfs://sandbox.hortonworks.com:8020/tmp/json/404Encounters.json, hdfs://sandbox.hortonworks.com:8020/tmp/json/405Encounters.json]]])
00-10                  Project(FacilityIdentifier0=[$0], SettingOfCare0=[$1])
00-13                    Project(FacilityIdentifier=[$1], SettingOfCare=[$2])
00-16                      SelectionVectorRemover
00-20                        Filter(condition=[=($0, UPPER(USER))])
00-24                          Project(username=[ITEM($0, 0)], FacilityIdentifier=[ITEM($0,
1)], SettingOfCare=[ITEM($0, 2)])
00-26                            Scan(groupscan=[EasyGroupScan [selectionRoot=hdfs://sandbox.hortonworks.com:8020/tmp/security,
numFiles=1, columns=[`columns`[0], `columns`[1], `columns`[2]], files=[hdfs://sandbox.hortonworks.com:8020/tmp/security/lake_data_security.csv]]])
00-05              Project(EncounterIdentifier0=[$0], FacilityIdentifier0=[$1])
00-07                Project(EncounterIdentifier=[$1], FacilityIdentifier=[$2])
00-09                  SelectionVectorRemover
00-12                    Filter(condition=[=($2, $3)])
00-15                      HashJoin(condition=[=($0, $4)], joinType=[inner])
00-19                        Project(SettingOfCare=[$0], EncounterIdentifier=[$1], ITEM=[ITEM($2,
'FacilityIdentifier')])
00-23                          Scan(groupscan=[EasyGroupScan [selectionRoot=hdfs://sandbox.hortonworks.com:8020/tmp/json,
numFiles=3, columns=[`SettingOfCare`, `EncounterIdentifier`, `Facility`.`FacilityIdentifier`],
files=[hdfs://sandbox.hortonworks.com:8020/tmp/json/403Encounters.json, hdfs://sandbox.hortonworks.com:8020/tmp/json/404Encounters.json,
hdfs://sandbox.hortonworks.com:8020/tmp/json/405Encounters.json]]])
00-18                        SelectionVectorRemover
00-22                          Filter(condition=[=($0, UPPER(USER))])
00-25                            Project(username=[ITEM($0, 0)], FacilityIdentifier=[ITEM($0,
1)])
00-27                              Scan(groupscan=[EasyGroupScan [selectionRoot=hdfs://sandbox.hortonworks.com:8020/tmp/security,
numFiles=1, columns=[`columns`[0], `columns`[1]], files=[hdfs://sandbox.hortonworks.com:8020/tmp/security/lake_data_security.csv]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "fs-scan",
    "@id" : 21,
    "userName" : "root",
    "files" : [ "hdfs://sandbox.hortonworks.com:8020/tmp/json/403Encounters.json", "hdfs://sandbox.hortonworks.com:8020/tmp/json/404Encounters.json",
"hdfs://sandbox.hortonworks.com:8020/tmp/json/405Encounters.json" ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "hdfs://sandbox.hortonworks.com:8020",
      "config" : null,
      "workspaces" : {
        "discharges" : {
          "location" : "/tmp",
          "writable" : true,
          "defaultInputFormat" : "avro"
        }
      },
      "formats" : {
        "avro" : {
          "type" : "avro"
        },
        "json" : {
          "type" : "json",
          "extensions" : [ "json" ]
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        }
      }
    },
    "format" : {
      "type" : "json",
      "extensions" : [ "json" ]
    },
    "columns" : [ "`EncounterIdentifier`", "`Facility`.`FacilityIdentifier`", "`SettingOfCare`",
"`MedicalProfessionals`" ],
    "selectionRoot" : "hdfs://sandbox.hortonworks.com:8020/tmp/json",
    "cost" : 2927.0
  }, {
    "pop" : "project",
    "@id" : 17,
    "exprs" : [ {
      "ref" : "`EncounterIdentifier`",
      "expr" : "`EncounterIdentifier`"
    }, {
      "ref" : "`FacilityIdentifier`",
      "expr" : "`Facility`.`FacilityIdentifier`"
    }, {
      "ref" : "`SettingOfCare`",
      "expr" : "`SettingOfCare`"
    }, {
      "ref" : "`MedicalProfessionals`",
      "expr" : "`MedicalProfessionals`"
    } ],
    "child" : 21,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 2927.0
  }, {
    "pop" : "flatten",
    "@id" : 14,
    "child" : 17,
    "column" : "`MedicalProfessionals`",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 2927.0
  }, {
    "pop" : "project",
    "@id" : 11,
    "exprs" : [ {
      "ref" : "`EncounterIdentifier`",
      "expr" : "`EncounterIdentifier`"
    }, {
      "ref" : "`FacilityIdentifier`",
      "expr" : "`FacilityIdentifier`"
    }, {
      "ref" : "`SettingOfCare`",
      "expr" : "`SettingOfCare`"
    }, {
      "ref" : "`ITEM`",
      "expr" : "`MedicalProfessionals`.`MedicalProfessionalIdentifierRaw`"
    }, {
      "ref" : "`ITEM4`",
      "expr" : "`MedicalProfessionals`.`MedicalProfessionalRoleCodeRaw`"
    }, {
      "ref" : "`ITEM5`",
      "expr" : "`MedicalProfessionals`.`MedicalProfessionalRoleCode`"
    }, {
      "ref" : "`ITEM6`",
      "expr" : "`MedicalProfessionals`.`FirstNameRaw`"
    }, {
      "ref" : "`ITEM7`",
      "expr" : "`MedicalProfessionals`.`LastNameRaw`"
    }, {
      "ref" : "`ITEM8`",
      "expr" : "`MedicalProfessionals`.`MiddleNameRaw`"
    }, {
      "ref" : "`ITEM9`",
      "expr" : "`MedicalProfessionals`.`MedicalProfessionalPrimarySpecialtyRaw`"
    }, {
      "ref" : "`ITEM10`",
      "expr" : "`MedicalProfessionals`.`MedicalProfessionalSecondarySpecialtyRaw`"
    }, {
      "ref" : "`ITEM11`",
      "expr" : "`MedicalProfessionals`.`NationalProviderIdentifierRaw`"
    }, {
      "ref" : "`ITEM12`",
      "expr" : "`MedicalProfessionals`.`UniformProviderIdentifierRaw`"
    } ],
    "child" : 14,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 2927.0
  }, {
    "pop" : "fs-scan",
    "@id" : 23,
    "userName" : "root",
    "files" : [ "hdfs://sandbox.hortonworks.com:8020/tmp/json/403Encounters.json", "hdfs://sandbox.hortonworks.com:8020/tmp/json/404Encounters.json",
"hdfs://sandbox.hortonworks.com:8020/tmp/json/405Encounters.json" ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "hdfs://sandbox.hortonworks.com:8020",
      "config" : null,
      "workspaces" : {
        "discharges" : {
          "location" : "/tmp",
          "writable" : true,
          "defaultInputFormat" : "avro"
        }
      },
      "formats" : {
        "avro" : {
          "type" : "avro"
        },
        "json" : {
          "type" : "json",
          "extensions" : [ "json" ]
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        }
      }
    },
    "format" : {
      "type" : "json",
      "extensions" : [ "json" ]
    },
    "columns" : [ "`SettingOfCare`", "`EncounterIdentifier`", "`Facility`.`FacilityIdentifier`"
],
    "selectionRoot" : "hdfs://sandbox.hortonworks.com:8020/tmp/json",
    "cost" : 2927.0
  }, {
    "pop" : "project",
    "@id" : 19,
    "exprs" : [ {
      "ref" : "`SettingOfCare`",
      "expr" : "`SettingOfCare`"
    }, {
      "ref" : "`EncounterIdentifier`",
      "expr" : "`EncounterIdentifier`"
    }, {
      "ref" : "`ITEM`",
      "expr" : "`Facility`.`FacilityIdentifier`"
    } ],
    "child" : 23,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 2927.0
  }, {
    "pop" : "fs-scan",
    "@id" : 27,
    "userName" : "root",
    "files" : [ "hdfs://sandbox.hortonworks.com:8020/tmp/security/lake_data_security.csv"
],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "hdfs://sandbox.hortonworks.com:8020",
      "config" : null,
      "workspaces" : {
        "discharges" : {
          "location" : "/tmp",
          "writable" : true,
          "defaultInputFormat" : "avro"
        }
      },
      "formats" : {
        "avro" : {
          "type" : "avro"
        },
        "json" : {
          "type" : "json",
          "extensions" : [ "json" ]
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        }
      }
    },
    "format" : {
      "type" : "text",
      "extensions" : [ "csv" ],
      "delimiter" : ","
    },
    "columns" : [ "`columns`[0]", "`columns`[1]" ],
    "selectionRoot" : "hdfs://sandbox.hortonworks.co |
+------+------+
1 row selected (3.622 seconds)


> Joins using views are not returning results.
> --------------------------------------------
>
>                 Key: DRILL-4553
>                 URL: https://issues.apache.org/jira/browse/DRILL-4553
>             Project: Apache Drill
>          Issue Type: Bug
>    Affects Versions: 1.5.0, 1.6.0
>            Reporter: Anton Fernando
>            Priority: Critical
>
> I have the following three views:
> create view view1 as select ..... from .... where username=user;
> create view view2 as select ..... from view1 as a, <file1> as b where a.col1 =
b.col1;
> create view view3 as select ..... from view1 as a, <file2> as b where a.col1 =
b.col1;
> A select * from each of these views works fine and returns the expected results.  A self
join on view2 and view3 also works fine.  However when view2 and view3 are joined on common
keys there are no rows returned.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message