drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Khurram Faraaz (JIRA)" <j...@apache.org>
Subject [jira] [Created] (DRILL-3891) ROW_KEY filter IN(integer values) does not get pushed in to Scan
Date Sat, 03 Oct 2015 01:39:26 GMT
Khurram Faraaz created DRILL-3891:
-------------------------------------

             Summary: ROW_KEY filter IN(integer values) does not get pushed in to Scan
                 Key: DRILL-3891
                 URL: https://issues.apache.org/jira/browse/DRILL-3891
             Project: Apache Drill
          Issue Type: Bug
          Components: Execution - Flow
    Affects Versions: 1.2.0
         Environment: 4 node cluster CentOS
            Reporter: Khurram Faraaz
            Assignee: Smidth Panchamia


ROW_KEY filter does not get pushed into Scan when filter involved IN (integer values). Data
inserted into HBase table is byte ordered and encoded as Int32.

case 1) NOT IN (8388607,2147483647,67108863,-536870912,-2147483648);

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as
rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB')
NOT IN (8388607,2147483647,67108863,-536870912,-2147483648);
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), 8388607), =(CONVERT_FROM($0,
'INT_OB'), 2147483647), =(CONVERT_FROM($0, 'INT_OB'), 67108863), =(CONVERT_FROM($0, 'INT_OB'),
-536870912), =(CONVERT_FROM($0, 'INT_OB'), -2147483648)))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl,
startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

case 2) NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as
rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB')
NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), '8388607'), =(CONVERT_FROM($0,
'INT_OB'), '2147483647'), =(CONVERT_FROM($0, 'INT_OB'), '67108863'), =(CONVERT_FROM($0, 'INT_OB'),
'-536870912'), =(CONVERT_FROM($0, 'INT_OB'), '-2147483648')))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl,
startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

case 3) NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as
rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB')
NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), '8388607'), =(CONVERT_FROM($0,
'INT_OB'), '2147483647'), =(CONVERT_FROM($0, 'INT_OB'), '67108863'), =(CONVERT_FROM($0, 'INT_OB'),
'-536870912'), =(CONVERT_FROM($0, 'INT_OB'), '-2147483648')))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl,
startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

case 4) NOT IN (cast('8388607' as int),cast('2147483647' as int),cast('67108863' as int),cast('-536870912'as
int),cast('-2147483648' as int));

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as
rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB')
NOT IN (cast('8388607' as int),cast('2147483647' as int),cast('67108863' as int),cast('-536870912'as
int),cast('-2147483648' as int));
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), CAST('8388607'):INTEGER
NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('2147483647'):INTEGER NOT NULL), =(CONVERT_FROM($0,
'INT_OB'), CAST('67108863'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('-536870912'):INTEGER
NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('-2147483648'):INTEGER NOT NULL)))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl,
startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

Data inserted into HBase table

{code}
int[] arr = {Integer.MIN_VALUE,Integer.MIN_VALUE/4,Integer.MIN_VALUE/8,Integer.MIN_VALUE/16,Integer.MIN_VALUE/32,Integer.MIN_VALUE/64,Integer.MIN_VALUE/128,Integer.MAX_VALUE,Integer.MAX_VALUE/4,Integer.MAX_VALUE/8,Integer.MAX_VALUE/16,Integer.MAX_VALUE/32,Integer.MAX_VALUE/64,Integer.MAX_VALUE/128,Integer.MAX_VALUE/256,Integer.MAX_VALUE};

        for (int i = 0; i < arr.length; i++) {
            byte[] bytes = new byte[5];
            org.apache.hadoop.hbase.util.PositionedByteRange br =
                new org.apache.hadoop.hbase.util.SimplePositionedByteRange(bytes, 0, 5);
            org.apache.hadoop.hbase.util.OrderedBytes.encodeInt32(br, arr[i],
                org.apache.hadoop.hbase.util.Order.ASCENDING);

            Put p = new Put(bytes);
            p.add(Bytes.toBytes("colfam1"),Bytes.toBytes("qual1"),String.format("value %d",
i).getBytes());
            table.put(p);
        }
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message