hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From c..@apache.org
Subject svn commit: r1297675 - in /hive/trunk: hbase-handler/src/java/org/apache/hadoop/hive/hbase/ hbase-handler/src/test/queries/ hbase-handler/src/test/results/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/io/ ql/src/jav...
Date Tue, 06 Mar 2012 20:10:27 GMT
Author: cws
Date: Tue Mar  6 20:10:26 2012
New Revision: 1297675

URL: http://svn.apache.org/viewvc?rev=1297675&view=rev
Log:
HIVE-2771 [jira] Add support for filter pushdown for key ranges in hbase for
keys of type string
(Ashutosh Chauhan via Carl Steinbach)

Summary:
https://issues.apache.org/jira/browse/HIVE-2771

This patch adds support for key range scans pushdown to hbase for keys of type
string. With this patch filter pushdowns of following types are supported:
a) Point lookups for keys of any types.
b) Range scans for keys of type string. 

Test Plan:
Added hbase_ppd_key_range.q which is modeled after hbase_pushdown.q

This is a subtask of HIVE-1643

Test Plan: EMPTY

Reviewers: JIRA, jsichi, cwsteinbach

Reviewed By: cwsteinbach

CC: jsichi, ashutoshc

Differential Revision: https://reviews.facebook.net/D1551

Added:
    hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q
    hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out
Modified:
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java?rev=1297675&r1=1297674&r2=1297675&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
(original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
Tue Mar  6 20:10:26 2012
@@ -43,7 +43,6 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
 import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
 import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler;
-import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -288,10 +287,13 @@ public class HBaseStorageHandler extends
       org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS);
     List<String> columnNames =
       Arrays.asList(columnNameProperty.split(","));
+
     HBaseSerDe hbaseSerde = (HBaseSerDe) deserializer;
+    String keyColName = columnNames.get(hbaseSerde.getKeyColumnOffset());
+    String keyColType = jobConf.get(org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES).
+        split(",")[hbaseSerde.getKeyColumnOffset()];
     IndexPredicateAnalyzer analyzer =
-      HiveHBaseTableInputFormat.newIndexPredicateAnalyzer(
-        columnNames.get(hbaseSerde.getKeyColumnOffset()));
+      HiveHBaseTableInputFormat.newIndexPredicateAnalyzer(keyColName, keyColType);
     List<IndexSearchCondition> searchConditions =
       new ArrayList<IndexSearchCondition>();
     ExprNodeDesc residualPredicate =

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java?rev=1297675&r1=1297674&r2=1297675&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
(original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
Tue Mar  6 20:10:26 2012
@@ -20,20 +20,16 @@ package org.apache.hadoop.hive.hbase;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.filter.BinaryComparator;
-import org.apache.hadoop.hbase.filter.CompareFilter;
-import org.apache.hadoop.hbase.filter.RowFilter;
-import org.apache.hadoop.hbase.filter.WhileMatchFilter;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.mapreduce.TableInputFormatBase;
 import org.apache.hadoop.hbase.mapreduce.TableSplit;
@@ -44,7 +40,6 @@ import org.apache.hadoop.hive.ql.exec.Ut
 import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
 import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde.Constants;
@@ -55,7 +50,6 @@ import org.apache.hadoop.hive.serde2.laz
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
@@ -64,7 +58,6 @@ import org.apache.hadoop.mapred.Reporter
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 
 /**
@@ -249,12 +242,9 @@ public class HiveHBaseTableInputFormat e
     ExprNodeDesc filterExpr =
       Utilities.deserializeExpression(filterExprSerialized, jobConf);
 
-    String columnNameProperty = jobConf.get(Constants.LIST_COLUMNS);
-    List<String> columnNames =
-      Arrays.asList(columnNameProperty.split(","));
-
-    IndexPredicateAnalyzer analyzer =
-      newIndexPredicateAnalyzer(columnNames.get(iKey));
+    String colName = jobConf.get(Constants.LIST_COLUMNS).split(",")[iKey];
+    String colType = jobConf.get(Constants.LIST_COLUMN_TYPES).split(",")[iKey];
+    IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(colName,colType);
 
     List<IndexSearchCondition> searchConditions =
       new ArrayList<IndexSearchCondition>();
@@ -279,7 +269,7 @@ public class HiveHBaseTableInputFormat e
     IndexSearchCondition sc = searchConditions.get(0);
     ExprNodeConstantEvaluator eval =
       new ExprNodeConstantEvaluator(sc.getConstantDesc());
-    byte [] startRow;
+    byte [] row;
     try {
       ObjectInspector objInspector = eval.initialize(null);
       Object writable = eval.evaluate(null);
@@ -291,18 +281,33 @@ public class HiveHBaseTableInputFormat e
         false,
         (byte) 0,
         null);
-      startRow = new byte[serializeStream.getCount()];
+      row = new byte[serializeStream.getCount()];
       System.arraycopy(
         serializeStream.getData(), 0,
-        startRow, 0, serializeStream.getCount());
+        row, 0, serializeStream.getCount());
     } catch (HiveException ex) {
       throw new IOException(ex);
     }
 
-    // stopRow is exclusive, so pad it with a trailing 0 byte to
-    // make it compare as the very next value after startRow
-    byte [] stopRow = new byte[startRow.length + 1];
-    System.arraycopy(startRow, 0, stopRow, 0, startRow.length);
+    byte [] startRow = HConstants.EMPTY_START_ROW, stopRow = HConstants.EMPTY_END_ROW;
+    String comparisonOp = sc.getComparisonOp();
+    if("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)){
+      startRow = row;
+      stopRow = getNextBA(row);
+    } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan".equals(comparisonOp)){
+      stopRow = row;
+    } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan"
+        .equals(comparisonOp)) {
+      startRow = row;
+    } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan"
+        .equals(comparisonOp)){
+      startRow = getNextBA(row);
+    } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan"
+        .equals(comparisonOp)){
+      stopRow = getNextBA(row);
+    } else {
+      throw new IOException(comparisonOp + " is not a supported comparison operator");
+    }
 
     if (tableSplit != null) {
       tableSplit = new TableSplit(
@@ -313,18 +318,17 @@ public class HiveHBaseTableInputFormat e
     }
     scan.setStartRow(startRow);
     scan.setStopRow(stopRow);
-
-    // Add a WhileMatchFilter to make the scan terminate as soon
-    // as we see a non-matching key.  This is probably redundant
-    // since the stopRow above should already take care of it for us.
-    scan.setFilter(
-      new WhileMatchFilter(
-        new RowFilter(
-          CompareFilter.CompareOp.EQUAL,
-          new BinaryComparator(startRow))));
     return tableSplit;
   }
 
+  private byte[] getNextBA(byte[] current){
+    // startRow is inclusive while stopRow is exclusive,
+    //this util method returns very next bytearray which will occur after the current one
+    // by padding current one with a trailing 0 byte.
+    byte[] next = new byte[current.length + 1];
+    System.arraycopy(current, 0, next, 0, current.length);
+    return next;
+  }
   /**
    * Instantiates a new predicate analyzer suitable for
    * determining how to push a filter down into the HBase scan,
@@ -335,13 +339,18 @@ public class HiveHBaseTableInputFormat e
    * @return preconfigured predicate analyzer
    */
   static IndexPredicateAnalyzer newIndexPredicateAnalyzer(
-    String keyColumnName) {
+    String keyColumnName, String keyColType) {
 
     IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
 
-    // for now, we only support equality comparisons
-    analyzer.addComparisonOp(
-      "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual");
+    analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual");
+    if(keyColType.equalsIgnoreCase("string")){
+      analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic." +
+        "GenericUDFOPEqualOrGreaterThan");
+      analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan");
+      analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan");
+      analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan");
+    }
 
     // and only on the key column
     analyzer.clearAllowedColumnNames();

Added: hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q?rev=1297675&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q (added)
+++ hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q Tue Mar  6 20:10:26 2012
@@ -0,0 +1,67 @@
+CREATE TABLE hbase_pushdown(key string, value string) 
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:string");
+
+INSERT OVERWRITE TABLE hbase_pushdown 
+SELECT cast(key as string), value
+FROM src;
+
+-- with full pushdown
+explain select * from hbase_pushdown where key>'90';
+
+select * from hbase_pushdown where key>'90';
+select * from hbase_pushdown where key<'1';
+select * from hbase_pushdown where key<='2';
+select * from hbase_pushdown where key>='90';
+
+-- with partial pushdown
+
+explain select * from hbase_pushdown where key>'90' and value like '%9%';
+
+select * from hbase_pushdown where key>'90' and value like '%9%';
+
+-- with two residuals
+
+explain select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int);
+
+select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int);
+
+
+-- with contradictory pushdowns
+
+explain select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%';
+
+select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%';
+
+-- with nothing to push down
+
+explain select * from hbase_pushdown;
+
+-- with a predicate which is not actually part of the filter, so
+-- it should be ignored by pushdown
+
+explain select * from hbase_pushdown
+where (case when key<'90' then 2 else 4 end) > 3;
+
+-- with a predicate which is under an OR, so it should
+-- be ignored by pushdown
+
+explain select * from hbase_pushdown
+where key<='80' or value like '%90%';
+
+-- following will not be pushed into hbase
+explain select * from hbase_pushdown where key > '281' 
+and key < '287';
+
+select * from hbase_pushdown where key > '281' 
+and key < '287';
+
+set hive.optimize.ppd.storage=false;
+
+-- with pushdown disabled
+
+explain select * from hbase_pushdown where key<='90';

Added: hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out?rev=1297675&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out (added)
+++ hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out Tue Mar  6 20:10:26
2012
@@ -0,0 +1,585 @@
+PREHOOK: query: CREATE TABLE hbase_pushdown(key string, value string) 
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:string")
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE hbase_pushdown(key string, value string) 
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:string")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@hbase_pushdown
+PREHOOK: query: INSERT OVERWRITE TABLE hbase_pushdown 
+SELECT cast(key as string), value
+FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@hbase_pushdown
+POSTHOOK: query: INSERT OVERWRITE TABLE hbase_pushdown 
+SELECT cast(key as string), value
+FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@hbase_pushdown
+PREHOOK: query: -- with full pushdown
+explain select * from hbase_pushdown where key>'90'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with full pushdown
+explain select * from hbase_pushdown where key>'90'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL
key) '90'))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_pushdown 
+          TableScan
+            alias: hbase_pushdown
+            filterExpr:
+                expr: (key > '90')
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: (key > '90')
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from hbase_pushdown where key>'90'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key>'90'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+92	val_92
+95	val_95
+96	val_96
+97	val_97
+98	val_98
+PREHOOK: query: select * from hbase_pushdown where key<'1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key<'1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+0	val_0
+PREHOOK: query: select * from hbase_pushdown where key<='2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key<='2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+0	val_0
+10	val_10
+100	val_100
+103	val_103
+104	val_104
+105	val_105
+11	val_11
+111	val_111
+113	val_113
+114	val_114
+116	val_116
+118	val_118
+119	val_119
+12	val_12
+120	val_120
+125	val_125
+126	val_126
+128	val_128
+129	val_129
+131	val_131
+133	val_133
+134	val_134
+136	val_136
+137	val_137
+138	val_138
+143	val_143
+145	val_145
+146	val_146
+149	val_149
+15	val_15
+150	val_150
+152	val_152
+153	val_153
+155	val_155
+156	val_156
+157	val_157
+158	val_158
+160	val_160
+162	val_162
+163	val_163
+164	val_164
+165	val_165
+166	val_166
+167	val_167
+168	val_168
+169	val_169
+17	val_17
+170	val_170
+172	val_172
+174	val_174
+175	val_175
+176	val_176
+177	val_177
+178	val_178
+179	val_179
+18	val_18
+180	val_180
+181	val_181
+183	val_183
+186	val_186
+187	val_187
+189	val_189
+19	val_19
+190	val_190
+191	val_191
+192	val_192
+193	val_193
+194	val_194
+195	val_195
+196	val_196
+197	val_197
+199	val_199
+2	val_2
+PREHOOK: query: select * from hbase_pushdown where key>='90'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key>='90'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+90	val_90
+92	val_92
+95	val_95
+96	val_96
+97	val_97
+98	val_98
+PREHOOK: query: -- with partial pushdown
+
+explain select * from hbase_pushdown where key>'90' and value like '%9%'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with partial pushdown
+
+explain select * from hbase_pushdown where key>'90' and value like '%9%'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL
key) '90') (like (TOK_TABLE_OR_COL value) '%9%')))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_pushdown 
+          TableScan
+            alias: hbase_pushdown
+            filterExpr:
+                expr: (key > '90')
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: (value like '%9%')
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from hbase_pushdown where key>'90' and value like '%9%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key>'90' and value like '%9%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+92	val_92
+95	val_95
+96	val_96
+97	val_97
+98	val_98
+PREHOOK: query: -- with two residuals
+
+explain select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with two residuals
+
+explain select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (>=
(TOK_TABLE_OR_COL key) '90') (like (TOK_TABLE_OR_COL value) '%9%')) (= (TOK_TABLE_OR_COL key)
(TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL value)))))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_pushdown 
+          TableScan
+            alias: hbase_pushdown
+            filterExpr:
+                expr: (key >= '90')
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: ((value like '%9%') and (key = UDFToInteger(value)))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+PREHOOK: query: -- with contradictory pushdowns
+
+explain select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with contradictory pushdowns
+
+explain select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (<
(TOK_TABLE_OR_COL key) '80') (> (TOK_TABLE_OR_COL key) '90')) (like (TOK_TABLE_OR_COL value)
'%90%')))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_pushdown 
+          TableScan
+            alias: hbase_pushdown
+            Filter Operator
+              predicate:
+                  expr: (((key < '80') and (key > '90')) and (value like '%90%'))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+PREHOOK: query: -- with nothing to push down
+
+explain select * from hbase_pushdown
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with nothing to push down
+
+explain select * from hbase_pushdown
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: -- with a predicate which is not actually part of the filter, so
+-- it should be ignored by pushdown
+
+explain select * from hbase_pushdown
+where (case when key<'90' then 2 else 4 end) > 3
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with a predicate which is not actually part of the filter, so
+-- it should be ignored by pushdown
+
+explain select * from hbase_pushdown
+where (case when key<'90' then 2 else 4 end) > 3
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_FUNCTION
when (< (TOK_TABLE_OR_COL key) '90') 2 4) 3))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_pushdown 
+          TableScan
+            alias: hbase_pushdown
+            Filter Operator
+              predicate:
+                  expr: (CASE WHEN ((key < '90')) THEN (2) ELSE (4) END > 3)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: -- with a predicate which is under an OR, so it should
+-- be ignored by pushdown
+
+explain select * from hbase_pushdown
+where key<='80' or value like '%90%'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with a predicate which is under an OR, so it should
+-- be ignored by pushdown
+
+explain select * from hbase_pushdown
+where key<='80' or value like '%90%'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (<= (TOK_TABLE_OR_COL
key) '80') (like (TOK_TABLE_OR_COL value) '%90%')))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_pushdown 
+          TableScan
+            alias: hbase_pushdown
+            Filter Operator
+              predicate:
+                  expr: ((key <= '80') or (value like '%90%'))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: -- following will not be pushed into hbase
+explain select * from hbase_pushdown where key > '281' 
+and key < '287'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- following will not be pushed into hbase
+explain select * from hbase_pushdown where key > '281' 
+and key < '287'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL
key) '281') (< (TOK_TABLE_OR_COL key) '287')))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_pushdown 
+          TableScan
+            alias: hbase_pushdown
+            Filter Operator
+              predicate:
+                  expr: ((key > '281') and (key < '287'))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from hbase_pushdown where key > '281' 
+and key < '287'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key > '281' 
+and key < '287'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+282	val_282
+283	val_283
+284	val_284
+285	val_285
+286	val_286
+PREHOOK: query: -- with pushdown disabled
+
+explain select * from hbase_pushdown where key<='90'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with pushdown disabled
+
+explain select * from hbase_pushdown where key<='90'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<= (TOK_TABLE_OR_COL
key) '90'))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_pushdown 
+          TableScan
+            alias: hbase_pushdown
+            Filter Operator
+              predicate:
+                  expr: (key <= '90')
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1297675&r1=1297674&r2=1297675&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Tue Mar  6 20:10:26
2012
@@ -1916,6 +1916,22 @@ public final class Utilities {
     jobConf.set(Constants.LIST_COLUMNS, columnNamesString);
   }
 
+  public static void setColumnTypeList(JobConf jobConf, Operator op) {
+    RowSchema rowSchema = op.getSchema();
+    if (rowSchema == null) {
+      return;
+    }
+    StringBuilder columnTypes = new StringBuilder();
+    for (ColumnInfo colInfo : rowSchema.getSignature()) {
+      if (columnTypes.length() > 0) {
+        columnTypes.append(",");
+      }
+      columnTypes.append(colInfo.getType().getTypeName());
+    }
+    String columnTypesString = columnTypes.toString();
+    jobConf.set(Constants.LIST_COLUMN_TYPES, columnTypesString);
+  }
+
   public static void validatePartSpec(Table tbl, Map<String, String> partSpec)
       throws SemanticException {
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1297675&r1=1297674&r2=1297675&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Tue Mar  6 20:10:26
2012
@@ -346,9 +346,9 @@ public class HiveInputFormat<K extends W
       return;
     }
 
-    // construct column name list for reference by filter push down
+    // construct column name list and types for reference by filter push down
     Utilities.setColumnNameList(jobConf, tableScan);
-
+    Utilities.setColumnTypeList(jobConf, tableScan);
     // push down filters
     ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
     if (filterExpr == null) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java?rev=1297675&r1=1297674&r2=1297675&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java Tue Mar  6 20:10:26
2012
@@ -782,6 +782,7 @@ public final class OpProcFactory {
       (HiveStoragePredicateHandler) storageHandler;
     JobConf jobConf = new JobConf(owi.getParseContext().getConf());
     Utilities.setColumnNameList(jobConf, tableScanOp);
+    Utilities.setColumnTypeList(jobConf, tableScanOp);
     Utilities.copyTableJobPropertiesToConf(
       Utilities.getTableDesc(tbl),
       jobConf);



Mime
View raw message