hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject svn commit: r1658038 [1/8] - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/ test/queries/clientpositive/ test/results/clie...
Date Sat, 07 Feb 2015 09:22:47 GMT
Author: prasanthj
Date: Sat Feb  7 09:22:46 2015
New Revision: 1658038

URL: http://svn.apache.org/r1658038
Log:
HIVE-9560: When hive.stats.collect.rawdatasize=true, 'rawDataSize' for an ORC table will result
in value '0' after running 'analyze table TABLE_NAME compute statistics;' (Prasanth Jayachandran
reviewed by Ashutosh Chauhan)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java
    hive/trunk/ql/src/test/queries/clientpositive/orc_analyze.q
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out
    hive/trunk/ql/src/test/results/clientpositive/limit_pushdown.q.out
    hive/trunk/ql/src/test/results/clientpositive/orc_analyze.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_13.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_14.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_15.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_16.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_7.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_8.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_9.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_div0.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_pushdown.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_distinct_gby.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_mapjoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_shufflejoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/vector_char_simple.q.out
    hive/trunk/ql/src/test/results/clientpositive/vector_coalesce.q.out
    hive/trunk/ql/src/test/results/clientpositive/vector_decimal_cast.q.out
    hive/trunk/ql/src/test/results/clientpositive/vector_elt.q.out
    hive/trunk/ql/src/test/results/clientpositive/vector_if_expr.q.out
    hive/trunk/ql/src/test/results/clientpositive/vector_left_outer_join.q.out
    hive/trunk/ql/src/test/results/clientpositive/vector_varchar_simple.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_0.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_13.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_14.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_15.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_16.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_7.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_8.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_9.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_div0.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_limit.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_pushdown.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_short_regress.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_casts.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_mapjoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_math_funcs.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_string_funcs.q.out
    hive/trunk/ql/src/test/results/clientpositive/windowing_streaming.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java?rev=1658038&r1=1658037&r2=1658038&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java Sat Feb  7 09:22:46
2015
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import java.io.Serializable;
+import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ConcurrentMap;
@@ -223,7 +224,12 @@ public class StatsNoJobTask extends Task
     int ret = 0;
 
     try {
-      List<Partition> partitions = getPartitionsList();
+      Collection<Partition> partitions = null;
+      if (work.getPrunedPartitionList() == null) {
+        partitions = getPartitionsList();
+      } else {
+        partitions = work.getPrunedPartitionList().getPartitions();
+      }
 
       // non-partitioned table
       if (partitions == null) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java?rev=1658038&r1=1658037&r2=1658038&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java Sat Feb
 7 09:22:46 2015
@@ -90,16 +90,25 @@ public class GenMRTableScan1 implements
 
         QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
         if (parseInfo.isAnalyzeCommand()) {
-          boolean partialScan = parseInfo.isPartialScanAnalyzeCommand();
-          boolean noScan = parseInfo.isNoScanAnalyzeCommand();
-          if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan))
{
-
+          if (inputFormat.equals(OrcInputFormat.class)) {
+            // For ORC, all the following statements are the same
+            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
             // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
             // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
+
             // There will not be any MR or Tez job above this task
             StatsNoJobWork snjWork = new StatsNoJobWork(parseCtx.getQB().getParseInfo().getTableSpec());
             snjWork.setStatsReliable(parseCtx.getConf().getBoolVar(
                 HiveConf.ConfVars.HIVE_STATS_RELIABLE));
+            // If partition is specified, get pruned partition list
+            Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo);
+            if (confirmedParts.size() > 0) {
+              Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias);
+              List<String> partCols = GenMapRedUtils.getPartitionColumns(parseInfo);
+              PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts,
+                  partCols, false);
+              snjWork.setPrunedPartitionList(partList);
+            }
             Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseCtx.getConf());
             ctx.setCurrTask(snjTask);
             ctx.setCurrTopOp(null);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java?rev=1658038&r1=1658037&r2=1658038&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java Sat Feb
 7 09:22:46 2015
@@ -95,16 +95,25 @@ public class ProcessAnalyzeTable impleme
       assert alias != null;
 
       TezWork tezWork = context.currentTask.getWork();
-      boolean partialScan = parseInfo.isPartialScanAnalyzeCommand();
-      boolean noScan = parseInfo.isNoScanAnalyzeCommand();
-      if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) {
-
+      if (inputFormat.equals(OrcInputFormat.class)) {
+        // For ORC, all the following statements are the same
+        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
         // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
         // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
+
         // There will not be any Tez job above this task
         StatsNoJobWork snjWork = new StatsNoJobWork(parseContext.getQB().getParseInfo().getTableSpec());
         snjWork.setStatsReliable(parseContext.getConf().getBoolVar(
             HiveConf.ConfVars.HIVE_STATS_RELIABLE));
+        // If partition is specified, get pruned partition list
+        Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo);
+        if (confirmedParts.size() > 0) {
+          Table source = parseContext.getQB().getMetaData().getTableForAlias(alias);
+          List<String> partCols = GenMapRedUtils.getPartitionColumns(parseInfo);
+          PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts,
+              partCols, false);
+          snjWork.setPrunedPartitionList(partList);
+        }
         Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseContext.getConf());
         snjTask.setParentTasks(null);
         context.rootTasks.remove(context.currentTask);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java?rev=1658038&r1=1658037&r2=1658038&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java Sat Feb  7 09:22:46
2015
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan;
 import java.io.Serializable;
 
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 
 /**
  * Client-side stats aggregator task.
@@ -31,6 +32,7 @@ public class StatsNoJobWork implements S
 
   private tableSpec tableSpecs;
   private boolean statsReliable;
+  private PrunedPartitionList prunedPartitionList;
 
   public StatsNoJobWork() {
   }
@@ -54,4 +56,12 @@ public class StatsNoJobWork implements S
   public void setStatsReliable(boolean statsReliable) {
     this.statsReliable = statsReliable;
   }
+
+  public void setPrunedPartitionList(PrunedPartitionList prunedPartitionList) {
+    this.prunedPartitionList = prunedPartitionList;
+  }
+
+  public PrunedPartitionList getPrunedPartitionList() {
+    return prunedPartitionList;
+  }
 }

Modified: hive/trunk/ql/src/test/queries/clientpositive/orc_analyze.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/orc_analyze.q?rev=1658038&r1=1658037&r2=1658038&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/orc_analyze.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/orc_analyze.q Sat Feb  7 09:22:46 2015
@@ -30,8 +30,13 @@ STORED AS orc;
 INSERT OVERWRITE TABLE orc_create_people SELECT * FROM orc_create_people_staging ORDER BY
id;
 
 set hive.stats.autogather = true;
+analyze table orc_create_people compute statistics;
+desc formatted orc_create_people;
+
 analyze table orc_create_people compute statistics partialscan;
+desc formatted orc_create_people;
 
+analyze table orc_create_people compute statistics noscan;
 desc formatted orc_create_people;
 
 drop table orc_create_people;
@@ -70,8 +75,15 @@ INSERT OVERWRITE TABLE orc_create_people
   SELECT * FROM orc_create_people_staging ORDER BY id;
 
 set hive.stats.autogather = true;
+analyze table orc_create_people partition(state) compute statistics;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
 analyze table orc_create_people partition(state) compute statistics partialscan;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
 
+analyze table orc_create_people partition(state) compute statistics noscan;
 desc formatted orc_create_people partition(state="Ca");
 desc formatted orc_create_people partition(state="Or");
 
@@ -116,8 +128,15 @@ INSERT OVERWRITE TABLE orc_create_people
   SELECT * FROM orc_create_people_staging ORDER BY id;
 
 set hive.stats.autogather = true;
+analyze table orc_create_people partition(state) compute statistics;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
 analyze table orc_create_people partition(state) compute statistics partialscan;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
 
+analyze table orc_create_people partition(state) compute statistics noscan;
 desc formatted orc_create_people partition(state="Ca");
 desc formatted orc_create_people partition(state="Or");
 
@@ -174,8 +193,15 @@ ALTER TABLE orc_create_people SET SERDE
 ALTER TABLE orc_create_people SET FILEFORMAT ORC;
 
 set hive.stats.autogather = true;
-analyze table orc_create_people partition(state) compute statistics noscan;
+analyze table orc_create_people partition(state) compute statistics;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="OH");
 
+analyze table orc_create_people partition(state) compute statistics partialscan;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="OH");
+
+analyze table orc_create_people partition(state) compute statistics noscan;
 desc formatted orc_create_people partition(state="Ca");
 desc formatted orc_create_people partition(state="OH");
 

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out?rev=1658038&r1=1658037&r2=1658038&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out Sat Feb  7 09:22:46
2015
@@ -109,14 +109,12 @@ PREHOOK: query: -- partition level analy
 analyze table loc_orc partition(year='2001') compute statistics
 PREHOOK: type: QUERY
 PREHOOK: Input: default@loc_orc
-PREHOOK: Input: default@loc_orc@year=2001
 PREHOOK: Output: default@loc_orc
 PREHOOK: Output: default@loc_orc@year=2001
 POSTHOOK: query: -- partition level analyze statistics for specific parition
 analyze table loc_orc partition(year='2001') compute statistics
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@loc_orc
-POSTHOOK: Input: default@loc_orc@year=2001
 POSTHOOK: Output: default@loc_orc
 POSTHOOK: Output: default@loc_orc@year=2001
 PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE
@@ -158,11 +156,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 10 Data size: 767 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 7 Data size: 678 Basic stats: PARTIAL Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year
(type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats:
PARTIAL
+            Statistics: Num rows: 7 Data size: 678 Basic stats: PARTIAL Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -181,19 +179,17 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 7 Data size: 425 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), '2001'
(type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 7 Data size: 425 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: NONE
             ListSink
 
 PREHOOK: query: -- partition level analyze statistics for all partitions
 analyze table loc_orc partition(year) compute statistics
 PREHOOK: type: QUERY
 PREHOOK: Input: default@loc_orc
-PREHOOK: Input: default@loc_orc@year=2001
-PREHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__
 PREHOOK: Output: default@loc_orc
 PREHOOK: Output: default@loc_orc@year=2001
 PREHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__
@@ -201,8 +197,6 @@ POSTHOOK: query: -- partition level anal
 analyze table loc_orc partition(year) compute statistics
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@loc_orc
-POSTHOOK: Input: default@loc_orc@year=2001
-POSTHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__
 POSTHOOK: Output: default@loc_orc
 POSTHOOK: Output: default@loc_orc@year=2001
 POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__
@@ -222,11 +216,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 1 Data size: 342 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__'
(type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 1 Data size: 342 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
             ListSink
 
 PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -245,7 +239,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 767 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year
(type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
@@ -268,7 +262,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 767 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year
(type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
@@ -331,11 +325,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 767 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: zip (type: bigint)
             outputColumnNames: _col0
-            Statistics: Num rows: 8 Data size: 767 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: NONE
             ListSink
 
 PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL
@@ -354,7 +348,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 767 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string)
             outputColumnNames: _col0
@@ -377,7 +371,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 767 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
             expressions: year (type: string)
             outputColumnNames: _col0
@@ -402,7 +396,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 767 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int)
             outputColumnNames: _col0, _col1
@@ -425,7 +419,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 7 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
             expressions: state (type: string), locid (type: int)
             outputColumnNames: _col0, _col1
@@ -448,11 +442,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 1 Data size: 342 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 1 Data size: 342 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
             ListSink
 
 PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL
@@ -471,7 +465,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 767 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year
(type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
@@ -496,7 +490,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 7 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
               predicate: (locid > 0) (type: boolean)
               Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -532,7 +526,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 7 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
               predicate: (locid > 0) (type: boolean)
               Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -568,7 +562,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 7 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
               predicate: (locid > 0) (type: boolean)
               Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out?rev=1658038&r1=1658037&r2=1658038&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out Sat Feb  7 09:22:46
2015
@@ -122,11 +122,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: lastname (type: string), deptid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats:
NONE
             ListSink
 
 PREHOOK: query: -- column level partial statistics
@@ -155,7 +155,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: lastname (type: string), deptid (type: int)
             outputColumnNames: _col0, _col1
@@ -180,7 +180,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
             expressions: deptid (type: int)
             outputColumnNames: _col0
@@ -213,7 +213,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
             expressions: lastname (type: string), deptid (type: int)
             outputColumnNames: _col0, _col1
@@ -236,7 +236,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
             expressions: lastname (type: string)
             outputColumnNames: _col0
@@ -259,7 +259,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
             expressions: deptid (type: int)
             outputColumnNames: _col0
@@ -282,7 +282,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 48 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
             expressions: lastname (type: string), deptid (type: int)
             outputColumnNames: _col0, _col1

Modified: hive/trunk/ql/src/test/results/clientpositive/limit_pushdown.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/limit_pushdown.q.out?rev=1658038&r1=1658037&r2=1658038&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/limit_pushdown.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/limit_pushdown.q.out Sat Feb  7 09:22:46
2015
@@ -352,34 +352,34 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: alltypesorc
-            Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats:
NONE
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats:
NONE
             Select Operator
               expressions: cdouble (type: double)
               outputColumnNames: _col0
-              Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column
stats: NONE
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
               Group By Operator
                 keys: _col0 (type: double)
                 mode: hash
                 outputColumnNames: _col0
-                Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column
stats: NONE
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: double)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: double)
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column
stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
                   TopN Hash Memory Usage: 0.3
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: double)
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats:
NONE
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats:
NONE
           Limit
             Number of rows: 20
-            Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats:
NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats:
NONE
+              Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats:
NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -435,22 +435,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: alltypesorc
-            Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats:
NONE
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats:
NONE
             Select Operator
               expressions: ctinyint (type: tinyint), cdouble (type: double)
               outputColumnNames: _col0, _col1
-              Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column
stats: NONE
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
               Group By Operator
                 aggregations: count(DISTINCT _col1)
                 keys: _col0 (type: tinyint), _col1 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column
stats: NONE
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: tinyint), _col1 (type: double)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: tinyint)
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column
stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
                   TopN Hash Memory Usage: 0.3
       Reduce Operator Tree:
         Group By Operator
@@ -458,13 +458,13 @@ STAGE PLANS:
           keys: KEY._col0 (type: tinyint)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats:
NONE
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats:
NONE
           Limit
             Number of rows: 20
-            Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats:
NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats:
NONE
+              Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats:
NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -522,22 +522,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: alltypesorc
-            Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats:
NONE
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats:
NONE
             Select Operator
               expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type:
string)
               outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column
stats: NONE
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
               Group By Operator
                 aggregations: count(DISTINCT _col1), count(DISTINCT _col2)
                 keys: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column
stats: NONE
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type:
string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: tinyint)
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column
stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column
stats: NONE
                   TopN Hash Memory Usage: 0.3
       Reduce Operator Tree:
         Group By Operator
@@ -545,13 +545,13 @@ STAGE PLANS:
           keys: KEY._col0 (type: tinyint)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats:
NONE
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats:
NONE
           Limit
             Number of rows: 20
-            Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats:
NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats:
NONE
+              Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats:
NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat



Mime
View raw message