hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ecapri...@apache.org
Subject svn commit: r1500071 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ test/queries/clientpositive/ test/results/clientpositive/
Date Fri, 05 Jul 2013 17:10:36 GMT
Author: ecapriolo
Date: Fri Jul  5 17:10:35 2013
New Revision: 1500071

URL: http://svn.apache.org/r1500071
Log:
HIVE-4804 parallel order by fails for small datasets (Navis via egc)

Submitted by:	Navis
Reviewed by:	Edward Capriolo

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java
    hive/trunk/ql/src/test/queries/clientpositive/parallel_orderby.q
    hive/trunk/ql/src/test/results/clientpositive/parallel_orderby.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java?rev=1500071&r1=1500070&r2=1500071&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java Fri Jul  5 17:10:35
2013
@@ -419,13 +419,14 @@ public class ExecDriver extends Task<Map
 
       Utilities.setMapRedWork(job, work, ctx.getMRTmpFileURI());
 
-      if (work.getSamplingType() > 0) {
+      if (work.getSamplingType() > 0 && work.getNumReduceTasks() > 1) {
         try {
           handleSampling(driverContext, work, job, new HiveConf(conf));
           job.setPartitionerClass(HiveTotalOrderPartitioner.class);
         } catch (Exception e) {
-          LOG.info("Failed to use sampling", e);
-          work.setNumReduceTasks(1);  // rollback
+          console.printInfo("Not enough sampling data.. Rolling back to single reducer task");
+          work.setNumReduceTasks(1);
+          job.setNumReduceTasks(1);
         }
       }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java?rev=1500071&r1=1500070&r2=1500071&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java Fri Jul
 5 17:10:35 2013
@@ -89,6 +89,9 @@ public class PartitionKeySampler impleme
       while (last >= k && C.compare(sorted[last], sorted[k]) == 0) {
         k++;
       }
+      if (k >= sorted.length) {
+        throw new IllegalStateException("not enough number of sample");
+      }
       partitionKeys[i - 1] = sorted[k];
       last = k;
     }

Modified: hive/trunk/ql/src/test/queries/clientpositive/parallel_orderby.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parallel_orderby.q?rev=1500071&r1=1500070&r2=1500071&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parallel_orderby.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/parallel_orderby.q Fri Jul  5 17:10:35 2013
@@ -4,7 +4,7 @@ load data local inpath '../data/files/kv
 
 set mapred.reduce.tasks = 4;
 set hive.optimize.sampling.orderby=true;
-set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.optimize.sampling.orderby.percent=0.66f;
 
 explain
 create table total_ordered as select * from src5 order by key, value;
@@ -12,3 +12,12 @@ create table total_ordered as select * f
 
 desc formatted total_ordered;
 select * from total_ordered;
+
+set hive.optimize.sampling.orderby.percent=0.0001f;
+-- rolling back to single task in case that the number of sample is not enough
+
+drop table total_ordered;
+create table total_ordered as select * from src5 order by key, value;
+
+desc formatted total_ordered;
+select * from total_ordered;

Modified: hive/trunk/ql/src/test/results/clientpositive/parallel_orderby.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parallel_orderby.q.out?rev=1500071&r1=1500070&r2=1500071&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parallel_orderby.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/parallel_orderby.q.out Fri Jul  5 17:10:35
2013
@@ -186,3 +186,112 @@ POSTHOOK: Input: default@total_ordered
 86	val_86
 98	val_98
 98	val_98
+PREHOOK: query: -- rolling back to single task in case that the number of sample is not enough
+
+drop table total_ordered
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@total_ordered
+PREHOOK: Output: default@total_ordered
+POSTHOOK: query: -- rolling back to single task in case that the number of sample is not
enough
+
+drop table total_ordered
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@total_ordered
+POSTHOOK: Output: default@total_ordered
+PREHOOK: query: create table total_ordered as select * from src5 order by key, value
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src5
+POSTHOOK: query: create table total_ordered as select * from src5 order by key, value
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src5
+POSTHOOK: Output: default@total_ordered
+PREHOOK: query: desc formatted total_ordered
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted total_ordered
+POSTHOOK: type: DESCTABLE
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	numFiles            	1                   
+	numPartitions       	0                   
+	numRows             	0                   
+	rawDataSize         	0                   
+	totalSize           	560                 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from total_ordered
+PREHOOK: type: QUERY
+PREHOOK: Input: default@total_ordered
+#### A masked pattern was here ####
+POSTHOOK: query: select * from total_ordered
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@total_ordered
+#### A masked pattern was here ####
+128	val_128
+128	val_128
+150	val_150
+150	val_150
+165	val_165
+165	val_165
+193	val_193
+193	val_193
+213	val_213
+213	val_213
+213	val_213
+213	val_213
+213	val_214
+213	val_214
+224	val_224
+224	val_224
+238	val_238
+238	val_238
+238	val_239
+238	val_239
+238	val_240
+238	val_240
+255	val_255
+255	val_255
+265	val_265
+265	val_265
+27	val_27
+27	val_27
+273	val_273
+273	val_273
+278	val_278
+278	val_278
+311	val_311
+311	val_311
+369	val_369
+369	val_369
+401	val_401
+401	val_401
+409	val_409
+409	val_409
+484	val_484
+484	val_484
+66	val_66
+66	val_66
+86	val_86
+86	val_86
+98	val_98
+98	val_98



Mime
View raw message