hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r1425589 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/plan/ test/queries/clientpositive/ test/results/clientpositive/
Date Mon, 24 Dec 2012 05:35:06 GMT
Author: namit
Date: Mon Dec 24 05:35:06 2012
New Revision: 1425589

URL: http://svn.apache.org/viewvc?rev=1425589&view=rev
Log:
HIVE-3832 Insert overwrite doesn't create a dir if the skewed column position doesnt match
(Gang Tim Liu via namit)


Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SkewedColumnPositionPair.java
    hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_11.q
    hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_12.q
    hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
    hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=1425589&r1=1425588&r2=1425589&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Mon Dec 24 05:35:06 2012
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.plan.Ex
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
 import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
+import org.apache.hadoop.hive.ql.plan.SkewedColumnPositionPair;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.ql.stats.StatsPublisher;
 import org.apache.hadoop.hive.ql.stats.StatsSetupConst;
@@ -719,8 +720,9 @@ public class FileSinkOperator extends Te
       "The row has less number of columns than no. of skewed column.";
 
     skewedValsCandidate = new ArrayList<String>(skewedCols.size());
-    for (int index : lbCtx.getRowSkewedIndex()) {
-      skewedValsCandidate.add(index, standObjs.get(index).toString());
+    for (SkewedColumnPositionPair posPair : lbCtx.getRowSkewedIndex()) {
+      skewedValsCandidate.add(posPair.getSkewColPosition(),
+          standObjs.get(posPair.getTblColPosition()).toString());
     }
     /* The row matches skewed column names. */
     if (allSkewedVals.contains(skewedValsCandidate)) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java?rev=1425589&r1=1425588&r2=1425589&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java Mon Dec 24 05:35:06 2012
@@ -40,14 +40,14 @@ public class ListBucketingCtx implements
   private List<String> skewedColNames;
   private List<List<String>> skewedColValues;
   private Map<List<String>, String> lbLocationMap;
-  private List<Integer> rowSkewedIndex;
+  private List<SkewedColumnPositionPair> rowSkewedIndex;
   private boolean isStoredAsSubDirectories;
   private String defaultKey;
   private String defaultDirName;
   private List<String> skewedValuesDirNames;
 
   public ListBucketingCtx() {
-    rowSkewedIndex = new ArrayList<Integer>();
+    rowSkewedIndex = new ArrayList<SkewedColumnPositionPair>();
     skewedValuesDirNames = new ArrayList<String>();
   }
 
@@ -109,7 +109,8 @@ public class ListBucketingCtx implements
         int index = this.skewedColNames.indexOf(cols.get(i).getInternalName());
         if (index > -1) {
           hitNo++;
-          rowSkewedIndex.add(index);
+          SkewedColumnPositionPair pair = new SkewedColumnPositionPair(i, index);
+          rowSkewedIndex.add(pair);
         }
       }
       assert (hitNo == this.skewedColNames.size()) : "RowSchema doesn't have all skewed columns."
@@ -139,20 +140,6 @@ public class ListBucketingCtx implements
   }
 
   /**
-   * @return the rowSkewedIndex
-   */
-  public List<Integer> getRowSkewedIndex() {
-    return rowSkewedIndex;
-  }
-
-  /**
-   * @param rowSkewedIndex the rowSkewedIndex to set
-   */
-  public void setRowSkewedIndex(List<Integer> rowSkewedIndex) {
-    this.rowSkewedIndex = rowSkewedIndex;
-  }
-
-  /**
    * @return the isStoredAsSubDirectories
    */
   public boolean isStoredAsSubDirectories() {
@@ -235,4 +222,20 @@ public class ListBucketingCtx implements
   public void setSkewedValuesDirNames(List<String> skewedValuesDirNames) {
     this.skewedValuesDirNames = skewedValuesDirNames;
   }
+
+  /**
+   * @return the rowSkewedIndex
+   */
+  public List<SkewedColumnPositionPair> getRowSkewedIndex() {
+    return rowSkewedIndex;
+  }
+
+  /**
+   * @param rowSkewedIndex the rowSkewedIndex to set
+   */
+  public void setRowSkewedIndex(List<SkewedColumnPositionPair> rowSkewedIndex) {
+    this.rowSkewedIndex = rowSkewedIndex;
+  }
 }
+
+

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SkewedColumnPositionPair.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SkewedColumnPositionPair.java?rev=1425589&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SkewedColumnPositionPair.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SkewedColumnPositionPair.java Mon Dec 24 05:35:06 2012
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.plan;
+/**
+ * This class record 2 types of positions for a skewed column:
+ * 1. position in table column list
+ * 2. position in skewed column list
+ * Position starts from 0.
+ * For example, create a table with
+ * create table list_bucketing_static_part (key String, value String)
+ * partitioned by (ds String, hr String)
+ * skewed by (value) on ('val_466','val_287','val_82')
+ * stored as DIRECTORIES
+ * STORED AS RCFILE;
+ *
+ * Skewed column is "value".
+ * 1. It's position in table column is 1.
+ * 2. It's position in skewed column list is 0.
+ *
+ * This information will be used in {@FileSinkOperator} generateListBucketingDirName
+ */
+public class SkewedColumnPositionPair {
+  private int tblColPosition;
+  private int skewColPosition;
+
+  public SkewedColumnPositionPair () {}
+
+  public SkewedColumnPositionPair (int tblColPosition, int skewColPosition) {
+    this.tblColPosition = tblColPosition;
+    this.skewColPosition = skewColPosition;
+  }
+
+  /**
+   * @return the tblColPosition
+   */
+  public int getTblColPosition() {
+    return tblColPosition;
+  }
+
+  /**
+   * @param tblColPosition the tblColPosition to set
+   */
+  public void setTblColPosition(int tblColPosition) {
+    this.tblColPosition = tblColPosition;
+  }
+
+  /**
+   * @return the skewColPosition
+   */
+  public int getSkewColPosition() {
+    return skewColPosition;
+  }
+
+  /**
+   * @param skewColPosition the skewColPosition to set
+   */
+  public void setSkewColPosition(int skewColPosition) {
+    this.skewColPosition = skewColPosition;
+  }
+
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_11.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_11.q?rev=1425589&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_11.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_11.q Mon Dec 24 05:35:06 2012
@@ -0,0 +1,36 @@
+set hive.mapred.supports.subdirectories=true;
+set mapred.input.dir.recursive=true;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false;
+
+-- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- list bucketing DML: static partition. multiple skewed columns.
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) 
+    partitioned by (ds String, hr String) 
+    skewed by (value) on ('val_466','val_287','val_82')
+    stored as DIRECTORIES
+    STORED AS RCFILE;
+
+-- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08',  hr = '11')
+select key, value from src;
+
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from src;
+
+-- check DML result
+show partitions list_bucketing_static_part;
+desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11');	
+
+set hive.optimize.listbucketing=true;
+explain extended
+select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466";
+select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466";
+
+drop table list_bucketing_static_part;

Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_12.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_12.q?rev=1425589&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_12.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_12.q Mon Dec 24 05:35:06 2012
@@ -0,0 +1,42 @@
+set hive.mapred.supports.subdirectories=true;
+set mapred.input.dir.recursive=true;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false;
+
+-- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns
+create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) 
+    partitioned by (ds String, hr String) 
+    skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82'))
+    stored as DIRECTORIES
+    STORED AS RCFILE;
+
+-- list bucketing DML 
+explain extended
+insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08',  hr = '11')
+select 1, key, 1, value, 1 from src;
+
+insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11')
+select 1, key, 1, value, 1 from src;
+
+-- check DML result
+show partitions list_bucketing_mul_col;
+desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11');	
+
+set hive.optimize.listbucketing=true;
+explain extended
+select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466";
+select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466";
+
+explain extended
+select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382";
+select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382";
+
+drop table list_bucketing_mul_col;

Added: hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out?rev=1425589&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out Mon Dec 24 05:35:06 2012
@@ -0,0 +1,367 @@
+PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- list bucketing DML: static partition. multiple skewed columns.
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) 
+    partitioned by (ds String, hr String) 
+    skewed by (value) on ('val_466','val_287','val_82')
+    stored as DIRECTORIES
+    STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- list bucketing DML: static partition. multiple skewed columns.
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) 
+    partitioned by (ds String, hr String) 
+    skewed by (value) on ('val_466','val_287','val_82')
+    stored as DIRECTORIES
+    STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@list_bucketing_static_part
+PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08',  hr = '11')
+select key, value from src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08',  hr = '11')
+select key, value from src
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME list_bucketing_static_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '11')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            GatherStats: false
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                Static Partition Specification: ds=2008-04-08/hr=11/
+#### A masked pattern was here ####
+                table:
+                    input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                    properties:
+                      bucket_count -1
+                      columns key,value
+                      columns.types string:string
+#### A masked pattern was here ####
+                      name default.list_bucketing_static_part
+                      partition_columns ds/hr
+                      serialization.ddl struct list_bucketing_static_part { string key, string value}
+                      serialization.format 1
+                      serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                    name: default.list_bucketing_static_part
+                TotalFiles: 1
+                GatherStats: true
+                MultiFileSpray: false
+      Needs Tagging: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: src
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              bucket_count -1
+              columns key,value
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.src
+              numFiles 1
+              numPartitions 0
+              numRows 0
+              rawDataSize 0
+              serialization.ddl struct src { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.src
+                numFiles 1
+                numPartitions 0
+                numRows 0
+                rawDataSize 0
+                serialization.ddl struct src { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 5812
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src
+            name: default.src
+      Truncated Path -> Alias:
+        /src [src]
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 2008-04-08
+            hr 11
+          replace: true
+#### A masked pattern was here ####
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.list_bucketing_static_part
+                partition_columns ds/hr
+                serialization.ddl struct list_bucketing_static_part { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.list_bucketing_static_part
+#### A masked pattern was here ####
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+#### A masked pattern was here ####
+
+
+PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11
+POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- check DML result
+show partitions list_bucketing_static_part
+PREHOOK: type: SHOWPARTITIONS
+POSTHOOK: query: -- check DML result
+show partitions list_bucketing_static_part
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ds=2008-04-08/hr=11
+PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+hr                  	string              	None                
+	 	 
+# Detailed Partition Information	 	 
+Partition Value:    	[2008-04-08, 11]    	 
+Database:           	default             	 
+Table:              	list_bucketing_static_part	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+#### A masked pattern was here ####
+Partition Parameters:	 	 
+	numFiles            	4                   
+	numRows             	500                 
+	rawDataSize         	4812                
+	totalSize           	5522                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Stored As SubDirectories:	Yes                 	 
+Skewed Columns:     	[value]             	 
+Skewed Values:      	[[val_466], [val_287], [val_82]]	 
+#### A masked pattern was here ####
+Skewed Value to Truncated Path:	{[val_82]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_82, [val_287]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_287, [val_466]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_466}	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: explain extended
+select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466"
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466"
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME list_bucketing_static_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11')) (= (TOK_TABLE_OR_COL value) "val_466")))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        list_bucketing_static_part 
+          TableScan
+            alias: list_bucketing_static_part
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate:
+                  expr: (value = 'val_466')
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      properties:
+                        columns _col0,_col1
+                        columns.types string:string
+                        escape.delim \
+                        serialization.format 1
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
+      Needs Tagging: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: value=val_466
+            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+            partition values:
+              ds 2008-04-08
+              hr 11
+            properties:
+              bucket_count -1
+              columns key,value
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.list_bucketing_static_part
+              numFiles 4
+              numPartitions 1
+              numRows 500
+              partition_columns ds/hr
+              rawDataSize 4812
+              serialization.ddl struct list_bucketing_static_part { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              totalSize 5522
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+          
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.list_bucketing_static_part
+                numFiles 4
+                numPartitions 1
+                numRows 500
+                partition_columns ds/hr
+                rawDataSize 4812
+                serialization.ddl struct list_bucketing_static_part { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                totalSize 5522
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.list_bucketing_static_part
+            name: default.list_bucketing_static_part
+      Truncated Path -> Alias:
+        /list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_466 [list_bucketing_static_part]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+466	val_466
+466	val_466
+466	val_466
+PREHOOK: query: drop table list_bucketing_static_part
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@list_bucketing_static_part
+PREHOOK: Output: default@list_bucketing_static_part
+POSTHOOK: query: drop table list_bucketing_static_part
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@list_bucketing_static_part
+POSTHOOK: Output: default@list_bucketing_static_part
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]

Added: hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out?rev=1425589&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out Mon Dec 24 05:35:06 2012
@@ -0,0 +1,560 @@
+PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns
+create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) 
+    partitioned by (ds String, hr String) 
+    skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82'))
+    stored as DIRECTORIES
+    STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns
+create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) 
+    partitioned by (ds String, hr String) 
+    skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82'))
+    stored as DIRECTORIES
+    STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@list_bucketing_mul_col
+PREHOOK: query: -- list bucketing DML 
+explain extended
+insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08',  hr = '11')
+select 1, key, 1, value, 1 from src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- list bucketing DML 
+explain extended
+insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08',  hr = '11')
+select 1, key, 1, value, 1 from src
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME list_bucketing_mul_col) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '11')))) (TOK_SELECT (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR 1))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            GatherStats: false
+            Select Operator
+              expressions:
+                    expr: 1
+                    type: int
+                    expr: key
+                    type: string
+                    expr: 1
+                    type: int
+                    expr: value
+                    type: string
+                    expr: 1
+                    type: int
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4
+              Select Operator
+                expressions:
+                      expr: UDFToString(_col0)
+                      type: string
+                      expr: _col1
+                      type: string
+                      expr: UDFToString(_col2)
+                      type: string
+                      expr: _col3
+                      type: string
+                      expr: UDFToString(_col4)
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  Static Partition Specification: ds=2008-04-08/hr=11/
+#### A masked pattern was here ####
+                  table:
+                      input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                      properties:
+                        bucket_count -1
+                        columns col1,col2,col3,col4,col5
+                        columns.types string:string:string:string:string
+#### A masked pattern was here ####
+                        name default.list_bucketing_mul_col
+                        partition_columns ds/hr
+                        serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+                        serialization.format 1
+                        serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+#### A masked pattern was here ####
+                      serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                      name: default.list_bucketing_mul_col
+                  TotalFiles: 1
+                  GatherStats: true
+                  MultiFileSpray: false
+      Needs Tagging: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: src
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              bucket_count -1
+              columns key,value
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.src
+              numFiles 1
+              numPartitions 0
+              numRows 0
+              rawDataSize 0
+              serialization.ddl struct src { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.src
+                numFiles 1
+                numPartitions 0
+                numRows 0
+                rawDataSize 0
+                serialization.ddl struct src { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 5812
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src
+            name: default.src
+      Truncated Path -> Alias:
+        /src [src]
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 2008-04-08
+            hr 11
+          replace: true
+#### A masked pattern was here ####
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              properties:
+                bucket_count -1
+                columns col1,col2,col3,col4,col5
+                columns.types string:string:string:string:string
+#### A masked pattern was here ####
+                name default.list_bucketing_mul_col
+                partition_columns ds/hr
+                serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.list_bucketing_mul_col
+#### A masked pattern was here ####
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+#### A masked pattern was here ####
+
+
+PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11')
+select 1, key, 1, value, 1 from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11')
+select 1, key, 1, value, 1 from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+PREHOOK: query: -- check DML result
+show partitions list_bucketing_mul_col
+PREHOOK: type: SHOWPARTITIONS
+POSTHOOK: query: -- check DML result
+show partitions list_bucketing_mul_col
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+ds=2008-04-08/hr=11
+PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+# col_name            	data_type           	comment             
+	 	 
+col1                	string              	None                
+col2                	string              	None                
+col3                	string              	None                
+col4                	string              	None                
+col5                	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+hr                  	string              	None                
+	 	 
+# Detailed Partition Information	 	 
+Partition Value:    	[2008-04-08, 11]    	 
+Database:           	default             	 
+Table:              	list_bucketing_mul_col	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+#### A masked pattern was here ####
+Partition Parameters:	 	 
+	numFiles            	4                   
+	numRows             	500                 
+	rawDataSize         	6312                
+	totalSize           	7094                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Stored As SubDirectories:	Yes                 	 
+Skewed Columns:     	[col2, col4]        	 
+Skewed Values:      	[[466, val_466], [287, val_287], [82, val_82]]	 
+#### A masked pattern was here ####
+Skewed Value to Truncated Path:	{[82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82, [466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287}	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: explain extended
+select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME list_bucketing_mul_col))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11')) (= (TOK_TABLE_OR_COL col2) "466")) (= (TOK_TABLE_OR_COL col4) "val_466")))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        list_bucketing_mul_col 
+          TableScan
+            alias: list_bucketing_mul_col
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate:
+                  expr: ((col2 = '466') and (col4 = 'val_466'))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: col1
+                      type: string
+                      expr: col2
+                      type: string
+                      expr: col3
+                      type: string
+                      expr: col4
+                      type: string
+                      expr: col5
+                      type: string
+                      expr: ds
+                      type: string
+                      expr: hr
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
+                        columns.types string:string:string:string:string:string:string
+                        escape.delim \
+                        serialization.format 1
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
+      Needs Tagging: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: col4=val_466
+            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+            partition values:
+              ds 2008-04-08
+              hr 11
+            properties:
+              bucket_count -1
+              columns col1,col2,col3,col4,col5
+              columns.types string:string:string:string:string
+#### A masked pattern was here ####
+              name default.list_bucketing_mul_col
+              numFiles 4
+              numPartitions 1
+              numRows 500
+              partition_columns ds/hr
+              rawDataSize 6312
+              serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              totalSize 7094
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+          
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              properties:
+                bucket_count -1
+                columns col1,col2,col3,col4,col5
+                columns.types string:string:string:string:string
+#### A masked pattern was here ####
+                name default.list_bucketing_mul_col
+                numFiles 4
+                numPartitions 1
+                numRows 500
+                partition_columns ds/hr
+                rawDataSize 6312
+                serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                totalSize 7094
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.list_bucketing_mul_col
+            name: default.list_bucketing_mul_col
+      Truncated Path -> Alias:
+        /list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466 [list_bucketing_mul_col]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: query: select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+1	466	1	val_466	1	2008-04-08	11
+1	466	1	val_466	1	2008-04-08	11
+1	466	1	val_466	1	2008-04-08	11
+PREHOOK: query: explain extended
+select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME list_bucketing_mul_col))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11')) (= (TOK_TABLE_OR_COL col2) "382")) (= (TOK_TABLE_OR_COL col4) "val_382")))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        list_bucketing_mul_col 
+          TableScan
+            alias: list_bucketing_mul_col
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate:
+                  expr: ((col2 = '382') and (col4 = 'val_382'))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: col1
+                      type: string
+                      expr: col2
+                      type: string
+                      expr: col3
+                      type: string
+                      expr: col4
+                      type: string
+                      expr: col5
+                      type: string
+                      expr: ds
+                      type: string
+                      expr: hr
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      properties:
+                        columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
+                        columns.types string:string:string:string:string:string:string
+                        escape.delim \
+                        serialization.format 1
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
+      Needs Tagging: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
+            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+            partition values:
+              ds 2008-04-08
+              hr 11
+            properties:
+              bucket_count -1
+              columns col1,col2,col3,col4,col5
+              columns.types string:string:string:string:string
+#### A masked pattern was here ####
+              name default.list_bucketing_mul_col
+              numFiles 4
+              numPartitions 1
+              numRows 500
+              partition_columns ds/hr
+              rawDataSize 6312
+              serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              totalSize 7094
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+          
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              properties:
+                bucket_count -1
+                columns col1,col2,col3,col4,col5
+                columns.types string:string:string:string:string
+#### A masked pattern was here ####
+                name default.list_bucketing_mul_col
+                numFiles 4
+                numPartitions 1
+                numRows 500
+                partition_columns ds/hr
+                rawDataSize 6312
+                serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                totalSize 7094
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.list_bucketing_mul_col
+            name: default.list_bucketing_mul_col
+      Truncated Path -> Alias:
+        /list_bucketing_mul_col/ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [list_bucketing_mul_col]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: query: select * from list_bucketing_mul_col 
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+1	382	1	val_382	1	2008-04-08	11
+1	382	1	val_382	1	2008-04-08	11
+PREHOOK: query: drop table list_bucketing_mul_col
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@list_bucketing_mul_col
+PREHOOK: Output: default@list_bucketing_mul_col
+POSTHOOK: query: drop table list_bucketing_mul_col
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@list_bucketing_mul_col
+POSTHOOK: Output: default@list_bucketing_mul_col
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []



Mime
View raw message