hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sze...@apache.org
Subject svn commit: r1674378 - in /hive/trunk: ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ ql/src/test/results/clientpositive/spark/ ql/src/test/results/clientpositive/tez/ serde/src/java...
Date Fri, 17 Apr 2015 18:40:50 GMT
Author: szehon
Date: Fri Apr 17 18:40:49 2015
New Revision: 1674378

URL: http://svn.apache.org/r1674378
Log:
HIVE-9580 : Server returns incorrect result from JOIN ON VARCHAR columns (Aihua Xu via Szehon)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/join_on_varchar.q
    hive/trunk/ql/src/test/results/clientpositive/join_on_varchar.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out
    hive/trunk/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out
    hive/trunk/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1674378&r1=1674377&r2=1674378&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Apr 17
18:40:49 2015
@@ -7707,6 +7707,10 @@ public class SemanticAnalyzer extends Ba
                 keys[i][k].getTypeInfo(), commonType)) {
           keys[i][k] = ParseUtils.createConversionCast(
                   keys[i][k], (PrimitiveTypeInfo)commonType);
+        } else {
+          // For the case no implicit type conversion, e.g., varchar(5) and varchar(10),
+          // pick the common type for all the keys since during run-time, same key type is
assumed.
+          keys[i][k].setTypeInfo(commonType);
         }
       }
     }

Added: hive/trunk/ql/src/test/queries/clientpositive/join_on_varchar.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/join_on_varchar.q?rev=1674378&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/join_on_varchar.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/join_on_varchar.q Fri Apr 17 18:40:49 2015
@@ -0,0 +1,12 @@
+-- SORT_QUERY_RESULTS
+
+create table tbl1(c1 varchar(10), intcol int);
+create table tbl2(c2 varchar(30));
+insert into table tbl1 select repeat('t', 10), 11 from src limit 1;
+insert into table tbl1 select repeat('s', 10), 22 from src limit 1;
+insert into table tbl2 select concat(repeat('t', 10), 'ppp') from src limit 1;
+insert into table tbl2 select repeat('s', 10) from src limit 1;
+
+explain
+select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2;
+select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2;

Added: hive/trunk/ql/src/test/results/clientpositive/join_on_varchar.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/join_on_varchar.q.out?rev=1674378&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/join_on_varchar.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/join_on_varchar.q.out Fri Apr 17 18:40:49
2015
@@ -0,0 +1,146 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table tbl1(c1 varchar(10), intcol int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table tbl1(c1 varchar(10), intcol int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl1
+PREHOOK: query: create table tbl2(c2 varchar(30))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: create table tbl2(c2 varchar(30))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl2
+PREHOOK: query: insert into table tbl1 select repeat('t', 10), 11 from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: insert into table tbl1 select repeat('t', 10), 11 from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl1
+POSTHOOK: Lineage: tbl1.c1 EXPRESSION []
+POSTHOOK: Lineage: tbl1.intcol SIMPLE []
+PREHOOK: query: insert into table tbl1 select repeat('s', 10), 22 from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: insert into table tbl1 select repeat('s', 10), 22 from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl1
+POSTHOOK: Lineage: tbl1.c1 EXPRESSION []
+POSTHOOK: Lineage: tbl1.intcol SIMPLE []
+PREHOOK: query: insert into table tbl2 select concat(repeat('t', 10), 'ppp') from src limit
1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: insert into table tbl2 select concat(repeat('t', 10), 'ppp') from src limit
1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl2
+POSTHOOK: Lineage: tbl2.c2 EXPRESSION []
+PREHOOK: query: insert into table tbl2 select repeat('s', 10) from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: insert into table tbl2 select repeat('s', 10) from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl2
+POSTHOOK: Lineage: tbl2.c2 EXPRESSION []
+PREHOOK: query: explain
+select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-1 depends on stages: Stage-3
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-3
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        tbl2 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        tbl2 
+          TableScan
+            alias: tbl2
+            Statistics: Num rows: 2 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: c2 is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 c1 (type: varchar(30))
+                  1 c2 (type: varchar(30))
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tbl1
+            Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: c1 is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 c1 (type: varchar(30))
+                  1 c2 (type: varchar(30))
+                outputColumnNames: _col0, _col5
+                Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats:
NONE
+                Select Operator
+                  expressions: _col0 (type: varchar(10)), _col5 (type: varchar(30))
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats:
NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: varchar(10)), _col1 (type: varchar(30))
+                    sort order: ++
+                    Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats:
NONE
+      Local Work:
+        Map Reduce Local Work
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: varchar(10)), KEY.reducesinkkey1 (type:
varchar(30))
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order
by c1,c2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order
by c1,c2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+ssssssssss	ssssssssss

Modified: hive/trunk/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out?rev=1674378&r1=1674377&r2=1674378&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out Fri Apr 17 18:40:49
2015
@@ -102,8 +102,8 @@ STAGE PLANS:
                     Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column
stats: NONE
                     Spark HashTable Sink Operator
                       keys:
-                        0 dec (type: decimal(4,2))
-                        1 dec (type: decimal(4,0))
+                        0 dec (type: decimal(6,2))
+                        1 dec (type: decimal(6,2))
             Local Work:
               Map Reduce Local Work
 
@@ -123,8 +123,8 @@ STAGE PLANS:
                       condition map:
                            Inner Join 0 to 1
                       keys:
-                        0 dec (type: decimal(4,2))
-                        1 dec (type: decimal(4,0))
+                        0 dec (type: decimal(6,2))
+                        1 dec (type: decimal(6,2))
                       outputColumnNames: _col0, _col4
                       input vertices:
                         1 Map 2

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out?rev=1674378&r1=1674377&r2=1674378&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out Fri Apr 17 18:40:49
2015
@@ -105,8 +105,8 @@ STAGE PLANS:
                       condition map:
                            Inner Join 0 to 1
                       keys:
-                        0 dec (type: decimal(4,2))
-                        1 dec (type: decimal(4,0))
+                        0 dec (type: decimal(6,2))
+                        1 dec (type: decimal(6,2))
                       outputColumnNames: _col0, _col4
                       input vertices:
                         1 Map 2
@@ -131,10 +131,11 @@ STAGE PLANS:
                     predicate: dec is not null (type: boolean)
                     Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column
stats: NONE
                     Reduce Output Operator
-                      key expressions: dec (type: decimal(4,0))
+                      key expressions: dec (type: decimal(6,2))
                       sort order: +
-                      Map-reduce partition columns: dec (type: decimal(4,0))
+                      Map-reduce partition columns: dec (type: decimal(6,2))
                       Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column
stats: NONE
+                      value expressions: dec (type: decimal(4,0))
 
   Stage: Stage-0
     Fetch Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out?rev=1674378&r1=1674377&r2=1674378&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out Fri Apr 17
18:40:49 2015
@@ -240,11 +240,11 @@ STAGE PLANS:
                     predicate: c2 is not null (type: boolean)
                     Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats:
NONE
                     Reduce Output Operator
-                      key expressions: c2 (type: char(10))
+                      key expressions: c2 (type: char(20))
                       sort order: +
-                      Map-reduce partition columns: c2 (type: char(10))
+                      Map-reduce partition columns: c2 (type: char(20))
                       Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column
stats: NONE
-                      value expressions: c1 (type: int)
+                      value expressions: c1 (type: int), c2 (type: char(10))
             Execution mode: vectorized
         Map 2 
             Map Operator Tree:
@@ -258,7 +258,7 @@ STAGE PLANS:
                       condition map:
                            Inner Join 0 to 1
                       keys:
-                        0 c2 (type: char(10))
+                        0 c2 (type: char(20))
                         1 c2 (type: char(20))
                       outputColumnNames: _col0, _col1, _col5, _col6
                       input vertices:

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out?rev=1674378&r1=1674377&r2=1674378&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out Fri Apr
17 18:40:49 2015
@@ -241,7 +241,7 @@ STAGE PLANS:
                       condition map:
                            Inner Join 0 to 1
                       keys:
-                        0 c2 (type: varchar(10))
+                        0 c2 (type: varchar(20))
                         1 c2 (type: varchar(20))
                       outputColumnNames: _col0, _col1, _col5, _col6
                       input vertices:

Modified: hive/trunk/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out?rev=1674378&r1=1674377&r2=1674378&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out Fri Apr 17 18:40:49
2015
@@ -238,7 +238,7 @@ STAGE PLANS:
               Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats:
NONE
               HashTable Sink Operator
                 keys:
-                  0 c2 (type: char(10))
+                  0 c2 (type: char(20))
                   1 c2 (type: char(20))
 
   Stage: Stage-2
@@ -254,7 +254,7 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 c2 (type: char(10))
+                  0 c2 (type: char(20))
                   1 c2 (type: char(20))
                 outputColumnNames: _col0, _col1, _col5, _col6
                 Statistics: Num rows: 2 Data size: 215 Basic stats: COMPLETE Column stats:
NONE

Modified: hive/trunk/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out?rev=1674378&r1=1674377&r2=1674378&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out Fri Apr 17
18:40:49 2015
@@ -236,7 +236,7 @@ STAGE PLANS:
               Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats:
NONE
               HashTable Sink Operator
                 keys:
-                  0 c2 (type: varchar(10))
+                  0 c2 (type: varchar(20))
                   1 c2 (type: varchar(20))
 
   Stage: Stage-2
@@ -252,7 +252,7 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 c2 (type: varchar(10))
+                  0 c2 (type: varchar(20))
                   1 c2 (type: varchar(20))
                 outputColumnNames: _col0, _col1, _col5, _col6
                 Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats:
NONE

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java?rev=1674378&r1=1674377&r2=1674378&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java Fri
Apr 17 18:40:49 2015
@@ -768,12 +768,14 @@ public final class TypeInfoUtils {
    * @return
    */
   public static boolean isConversionRequiredForComparison(TypeInfo typeA, TypeInfo typeB)
{
-    if (typeA == typeB) {
+    if (typeA.equals(typeB)) {
       return false;
     }
-    if (TypeInfoUtils.doPrimitiveCategoriesMatch(typeA,  typeB)) {
+
+    if (TypeInfoUtils.doPrimitiveCategoriesMatch(typeA, typeB)) {
       return false;
     }
+
     return true;
   }
 



Mime
View raw message