hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject hive git commit: HIVE-12684: NPE in stats annotation when all values in decimal column are NULLs (Prasanth Jayachandran reviewed by Pengcheng Xiong)
Date Thu, 17 Dec 2015 19:39:21 GMT
Repository: hive
Updated Branches:
  refs/heads/master 0f1c112fc -> cac5804de


HIVE-12684: NPE in stats annotation when all values in decimal column are NULLs (Prasanth
Jayachandran reviewed by Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cac5804d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cac5804d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cac5804d

Branch: refs/heads/master
Commit: cac5804de034ad54821e0524091cff0f4a97476b
Parents: 0f1c112
Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Authored: Thu Dec 17 13:38:57 2015 -0600
Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Committed: Thu Dec 17 13:38:57 2015 -0600

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  20 ++--
 .../test/queries/clientpositive/decimal_stats.q |  16 +++
 .../results/clientpositive/decimal_stats.q.out  | 106 +++++++++++++++++++
 3 files changed, 135 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cac5804d/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 149cbc1..2f78fe8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -709,13 +709,19 @@ public class StatsUtils {
       cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
       cs.setCountDistint(csd.getDecimalStats().getNumDVs());
       cs.setNumNulls(csd.getDecimalStats().getNumNulls());
-      Decimal val = csd.getDecimalStats().getHighValue();
-      BigDecimal maxVal = HiveDecimal.
-          create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
-      val = csd.getDecimalStats().getLowValue();
-      BigDecimal minVal = HiveDecimal.
-          create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
-      cs.setRange(minVal, maxVal);
+      Decimal highValue = csd.getDecimalStats().getHighValue();
+      Decimal lowValue = csd.getDecimalStats().getLowValue();
+      if (highValue != null && highValue.getUnscaled() != null
+          && lowValue != null && lowValue.getUnscaled() != null) {
+        HiveDecimal maxHiveDec = HiveDecimal.create(new BigInteger(highValue.getUnscaled()),
highValue.getScale());
+        BigDecimal maxVal = maxHiveDec == null ? null : maxHiveDec.bigDecimalValue();
+        HiveDecimal minHiveDec = HiveDecimal.create(new BigInteger(lowValue.getUnscaled()),
lowValue.getScale());
+        BigDecimal minVal = minHiveDec == null ? null : minHiveDec.bigDecimalValue();
+
+        if (minVal != null && maxVal != null) {
+          cs.setRange(minVal, maxVal);
+        }
+      }
     } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
       cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
     } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/cac5804d/ql/src/test/queries/clientpositive/decimal_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/decimal_stats.q b/ql/src/test/queries/clientpositive/decimal_stats.q
new file mode 100644
index 0000000..2370e7d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/decimal_stats.q
@@ -0,0 +1,16 @@
+set hive.stats.fetch.column.stats=true;
+drop table if exists decimal_1;
+
+create table decimal_1 (t decimal(4,2), u decimal(5), v decimal);
+
+desc decimal_1;
+
+insert overwrite table decimal_1
+  select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src;
+
+analyze table decimal_1 compute statistics for columns;
+
+desc formatted decimal_1 v;
+
+explain select * from decimal_1 order by 1 limit 100;
+drop table decimal_1;

http://git-wip-us.apache.org/repos/asf/hive/blob/cac5804d/ql/src/test/results/clientpositive/decimal_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out
new file mode 100644
index 0000000..dabf7f8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/decimal_stats.q.out
@@ -0,0 +1,106 @@
+PREHOOK: query: drop table if exists decimal_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists decimal_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@decimal_1
+POSTHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@decimal_1
+PREHOOK: query: desc decimal_1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@decimal_1
+POSTHOOK: query: desc decimal_1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@decimal_1
+t                   	decimal(4,2)        	                    
+u                   	decimal(5,0)        	                    
+v                   	decimal(10,0)       	                    
+PREHOOK: query: insert overwrite table decimal_1
+  select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@decimal_1
+POSTHOOK: query: insert overwrite table decimal_1
+  select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@decimal_1
+POSTHOOK: Lineage: decimal_1.t EXPRESSION []
+POSTHOOK: Lineage: decimal_1.u EXPRESSION []
+POSTHOOK: Lineage: decimal_1.v EXPRESSION []
+PREHOOK: query: analyze table decimal_1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table decimal_1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_1
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted decimal_1 v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@decimal_1
+POSTHOOK: query: desc formatted decimal_1 v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@decimal_1
+# col_name            	data_type           	min                 	max                 	num_nulls
          	distinct_count      	avg_col_len         	max_col_len         	num_trues      
    	num_falses          	comment             
+	 	 	 	 	 	 	 	 	 	 
+v                   	decimal(10,0)       	                    	                    	500 
               	1                   	                    	                    	          
         	                    	from deserializer   
+PREHOOK: query: explain select * from decimal_1 order by 1 limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from decimal_1 order by 1 limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: decimal_1
+            Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats:
COMPLETE
+            Select Operator
+              expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v (type: decimal(10,0))
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats:
COMPLETE
+              Reduce Output Operator
+                key expressions: 1 (type: int)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column
stats: COMPLETE
+                TopN Hash Memory Usage: 0.1
+                value expressions: _col0 (type: decimal(4,2)), _col1 (type: decimal(5,0)),
_col2 (type: decimal(10,0))
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: decimal(4,2)), VALUE._col1 (type: decimal(5,0)),
VALUE._col2 (type: decimal(10,0))
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats:
COMPLETE
+          Limit
+            Number of rows: 100
+            Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats:
COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats:
COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 100
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table decimal_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@decimal_1
+PREHOOK: Output: default@decimal_1
+POSTHOOK: query: drop table decimal_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@decimal_1
+POSTHOOK: Output: default@decimal_1


Mime
View raw message