hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1608622 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java test/queries/clientpositive/ppd_join5.q test/results/clientpositive/ppd_join5.q.out
Date Mon, 07 Jul 2014 22:04:35 GMT
Author: hashutosh
Date: Mon Jul  7 22:04:34 2014
New Revision: 1608622

URL: http://svn.apache.org/r1608622
Log:
HIVE-7346 : Wrong results caused by hive ppd under specific join condition (Navis via Ashutosh
Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/ppd_join5.q
    hive/trunk/ql/src/test/results/clientpositive/ppd_join5.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java?rev=1608622&r1=1608621&r2=1608622&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java Mon Jul  7 22:04:34
2014
@@ -77,9 +77,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.GenericUDAFRankEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
-import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction;
 import org.apache.hadoop.hive.serde2.Deserializer;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.mapred.JobConf;
 
@@ -196,7 +194,7 @@ public final class OpProcFactory {
         return;
       }
       
-      ExprWalkerInfo childInfo = getChildWalkerInfo((Operator<?>) ptfOp, owi);
+      ExprWalkerInfo childInfo = getChildWalkerInfo(ptfOp, owi);
 
       if (childInfo == null) {
         return;
@@ -411,16 +409,18 @@ public final class OpProcFactory {
         Object... nodeOutputs) throws SemanticException {
       LOG.info("Processing for " + nd.getName() + "("
           + ((Operator) nd).getIdentifier() + ")");
+
       OpWalkerInfo owi = (OpWalkerInfo) procCtx;
-      Operator<? extends OperatorDesc> op =
-        (Operator<? extends OperatorDesc>) nd;
-      ExprNodeDesc predicate = (((FilterOperator) nd).getConf()).getPredicate();
-      ExprWalkerInfo ewi = new ExprWalkerInfo();
+      Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>)
nd;
+
+      // if this filter is generated one, predicates need not to be extracted
+      ExprWalkerInfo ewi = owi.getPrunedPreds(op);
       // Don't push a sampling predicate since createFilter() always creates filter
       // with isSamplePred = false. Also, the filterop with sampling pred is always
       // a child of TableScan, so there is no need to push this predicate.
-      if (!((FilterOperator)op).getConf().getIsSamplingPred()) {
+      if (ewi == null && !((FilterOperator)op).getConf().getIsSamplingPred()) {
         // get pushdown predicates for this operator's predicate
+        ExprNodeDesc predicate = (((FilterOperator) nd).getConf()).getPredicate();
         ewi = ExprWalkerProcFactory.extractPushdownPreds(owi, op, predicate);
         if (!ewi.isDeterministic()) {
           /* predicate is not deterministic */
@@ -964,6 +964,12 @@ public final class OpProcFactory {
       }
       owi.getCandidateFilterOps().clear();
     }
+    // push down current ppd context to newly added filter
+    ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op);
+    if (walkerInfo != null) {
+      walkerInfo.getNonFinalCandidates().clear();
+      owi.putPrunedPreds(output, walkerInfo);
+    }
     return output;
   }
 
@@ -1048,7 +1054,7 @@ public final class OpProcFactory {
     tableScanDesc.setFilterExpr(decomposed.pushedPredicate);
     tableScanDesc.setFilterObject(decomposed.pushedPredicateObject);
 
-    return (ExprNodeGenericFuncDesc)decomposed.residualPredicate;
+    return decomposed.residualPredicate;
   }
 
   public static NodeProcessor getFilterProc() {

Added: hive/trunk/ql/src/test/queries/clientpositive/ppd_join5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ppd_join5.q?rev=1608622&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ppd_join5.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/ppd_join5.q Mon Jul  7 22:04:34 2014
@@ -0,0 +1,24 @@
+create table t1 (id1 string, id2 string);
+create table t2 (id string, d int);
+
+from src tablesample (1 rows)
+  insert into table t1 select 'a','a'
+  insert into table t2 select 'a',2;
+
+explain
+select a.*,b.d d1,c.d d2 from
+  t1 a join t2 b on (a.id1 = b.id)
+       join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1;
+
+explain
+select * from (
+select a.*,b.d d1,c.d d2 from
+  t1 a join t2 b on (a.id1 = b.id)
+       join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1
+) z where d1 > 1 or d2 > 1;
+
+select * from (
+select a.*,b.d d1,c.d d2 from
+  t1 a join t2 b on (a.id1 = b.id)
+       join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1
+) z where d1 > 1 or d2 > 1;

Added: hive/trunk/ql/src/test/results/clientpositive/ppd_join5.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ppd_join5.q.out?rev=1608622&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ppd_join5.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/ppd_join5.q.out Mon Jul  7 22:04:34 2014
@@ -0,0 +1,266 @@
+PREHOOK: query: create table t1 (id1 string, id2 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: create table t1 (id1 string, id2 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (id string, d int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: create table t2 (id string, d int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: from src tablesample (1 rows)
+  insert into table t1 select 'a','a'
+  insert into table t2 select 'a',2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@t1
+PREHOOK: Output: default@t2
+POSTHOOK: query: from src tablesample (1 rows)
+  insert into table t1 select 'a','a'
+  insert into table t2 select 'a',2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@t1
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t1.id1 SIMPLE []
+POSTHOOK: Lineage: t1.id2 SIMPLE []
+POSTHOOK: Lineage: t2.d SIMPLE []
+POSTHOOK: Lineage: t2.id SIMPLE []
+Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: explain
+select a.*,b.d d1,c.d d2 from
+  t1 a join t2 b on (a.id1 = b.id)
+       join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.*,b.d d1,c.d d2 from
+  t1 a join t2 b on (a.id1 = b.id)
+       join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (id is not null and (d <= 1)) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Reduce Output Operator
+                key expressions: id (type: string), id (type: string)
+                sort order: ++
+                Map-reduce partition columns: id (type: string), id (type: string)
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                value expressions: d (type: int)
+          TableScan
+            alias: a
+            Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (id1 is not null and id2 is not null) (type: boolean)
+              Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: id1 (type: string), id2 (type: string)
+                sort order: ++
+                Map-reduce partition columns: id1 (type: string), id2 (type: string)
+                Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats:
NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {KEY.reducesinkkey0} {KEY.reducesinkkey1}
+            1 {VALUE._col0}
+          outputColumnNames: _col0, _col1, _col5
+          Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: string), _col5 (type:
int)
+          TableScan
+            alias: c
+            Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (d <= 1) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Reduce Output Operator
+                sort order: 
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                value expressions: d (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1} {VALUE._col5}
+            1 {VALUE._col1}
+          outputColumnNames: _col0, _col1, _col5, _col9
+          Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), _col5 (type: int), _col9
(type: int)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: explain
+select * from (
+select a.*,b.d d1,c.d d2 from
+  t1 a join t2 b on (a.id1 = b.id)
+       join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1
+) z where d1 > 1 or d2 > 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from (
+select a.*,b.d d1,c.d d2 from
+  t1 a join t2 b on (a.id1 = b.id)
+       join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1
+) z where d1 > 1 or d2 > 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (id1 is not null and id2 is not null) (type: boolean)
+              Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: id1 (type: string), id2 (type: string)
+                sort order: ++
+                Map-reduce partition columns: id1 (type: string), id2 (type: string)
+                Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats:
NONE
+          TableScan
+            alias: b
+            Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (id is not null and (d <= 1)) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Reduce Output Operator
+                key expressions: id (type: string), id (type: string)
+                sort order: ++
+                Map-reduce partition columns: id (type: string), id (type: string)
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                value expressions: d (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {KEY.reducesinkkey0} {KEY.reducesinkkey1}
+            1 {VALUE._col0}
+          outputColumnNames: _col0, _col1, _col5
+          Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: string), _col5 (type:
int)
+          TableScan
+            alias: c
+            Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (d <= 1) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Reduce Output Operator
+                sort order: 
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                value expressions: d (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1} {VALUE._col5}
+            1 {VALUE._col1}
+          outputColumnNames: _col0, _col1, _col5, _col9
+          Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: ((_col5 > 1) or (_col9 > 1)) (type: boolean)
+            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), _col5 (type: int),
_col9 (type: int)
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: select * from (
+select a.*,b.d d1,c.d d2 from
+  t1 a join t2 b on (a.id1 = b.id)
+       join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1
+) z where d1 > 1 or d2 > 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from (
+select a.*,b.d d1,c.d d2 from
+  t1 a join t2 b on (a.id1 = b.id)
+       join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1
+) z where d1 > 1 or d2 > 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####



Mime
View raw message