hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ct...@apache.org
Subject hive git commit: HIVE-12245: Support column comments for an HBase backed table (Chaoyu Tang, reviewed by Jimmy Xiang)
Date Thu, 29 Oct 2015 02:47:08 GMT
Repository: hive
Updated Branches:
  refs/heads/master 53fc31931 -> 99a043a05


HIVE-12245: Support column comments for an HBase backed table (Chaoyu Tang, reviewed by Jimmy
Xiang)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/99a043a0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/99a043a0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/99a043a0

Branch: refs/heads/master
Commit: 99a043a05b4d823589e403de9779cf3a4b881ca3
Parents: 53fc319
Author: ctang <ctang.ma@gmail.com>
Authored: Wed Oct 28 22:46:55 2015 -0400
Committer: ctang <ctang.ma@gmail.com>
Committed: Wed Oct 28 22:46:55 2015 -0400

----------------------------------------------------------------------
 .../hive/hbase/HBaseLazyObjectFactory.java      | 28 +++++++++++++++
 .../apache/hadoop/hive/hbase/HBaseSerDe.java    |  5 +--
 .../src/test/queries/positive/hbase_queries.q   |  4 ++-
 .../results/positive/external_table_ppd.q.out   | 16 ++++-----
 .../positive/hbase_binary_storage_queries.q.out | 32 ++++++++---------
 .../test/results/positive/hbase_queries.q.out   | 37 +++++++++++++++-----
 .../test/results/positive/hbase_timestamp.q.out |  6 ++--
 .../positive/hbase_timestamp_format.q.out       | 12 +++----
 8 files changed, 93 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
----------------------------------------------------------------------
diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
index cb9f9d3..841e8ba 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
@@ -19,7 +19,10 @@
 package org.apache.hadoop.hive.hbase;
 
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
+import java.util.Properties;
 
 import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
 import org.apache.hadoop.hive.serde2.SerDeException;
@@ -53,4 +56,29 @@ public class HBaseLazyObjectFactory {
         serdeParams.getColumnNames(), columnObjectInspectors, null, serdeParams.getSeparators()[0],
         serdeParams, ObjectInspectorOptions.JAVA);
   }
+
+  public static ObjectInspector createLazyHBaseStructInspector(HBaseSerDeParameters hSerdeParams,
+      Properties tbl)
+      throws SerDeException {
+    List<TypeInfo> columnTypes = hSerdeParams.getColumnTypes();
+    ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(
+        columnTypes.size());
+    for (int i = 0; i < columnTypes.size(); i++) {
+      if (i == hSerdeParams.getKeyIndex()) {
+        columnObjectInspectors.add(hSerdeParams.getKeyFactory()
+            .createKeyObjectInspector(columnTypes.get(i)));
+      } else {
+        columnObjectInspectors.add(hSerdeParams.getValueFactories().get(i)
+            .createValueObjectInspector(columnTypes.get(i)));
+      }
+    }
+    List<String> structFieldComments = tbl.getProperty("columns.comments") == null
?
+        new ArrayList<String>(Collections.nCopies(columnTypes.size(), ""))
+        : Arrays.asList(tbl.getProperty("columns.comments").split("\0", columnTypes.size()));
+
+    return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
+        hSerdeParams.getColumnNames(), columnObjectInspectors, structFieldComments,
+        hSerdeParams.getSerdeParams().getSeparators()[0],
+        hSerdeParams.getSerdeParams(), ObjectInspectorOptions.JAVA);
+  }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
----------------------------------------------------------------------
diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
index 41d6302..466aabe 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
@@ -125,10 +125,7 @@ public class HBaseSerDe extends AbstractSerDe {
     serdeParams = new HBaseSerDeParameters(conf, tbl, getClass().getName());
 
     cachedObjectInspector =
-        HBaseLazyObjectFactory
-            .createLazyHBaseStructInspector(serdeParams.getSerdeParams(),
-                serdeParams.getKeyIndex(), serdeParams.getKeyFactory(),
-                serdeParams.getValueFactories());
+        HBaseLazyObjectFactory.createLazyHBaseStructInspector(serdeParams, tbl);
 
     cachedHBaseRow = new LazyHBaseRow(
         (LazySimpleStructObjectInspector) cachedObjectInspector, serdeParams);

http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/queries/positive/hbase_queries.q
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/queries/positive/hbase_queries.q b/hbase-handler/src/test/queries/positive/hbase_queries.q
index 6ef9325..b445c4b 100644
--- a/hbase-handler/src/test/queries/positive/hbase_queries.q
+++ b/hbase-handler/src/test/queries/positive/hbase_queries.q
@@ -1,5 +1,5 @@
 DROP TABLE hbase_table_1;
-CREATE TABLE hbase_table_1(key int, value string) 
+CREATE TABLE hbase_table_1(key int comment 'It is a column key', value string comment 'It
is the column string value')
 STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
 WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string")
 TBLPROPERTIES ("hbase.table.name" = "hbase_table_0");
@@ -162,6 +162,8 @@ DESCRIBE EXTENDED hbase_table_1_like;
 INSERT OVERWRITE TABLE hbase_table_1_like SELECT * FROM hbase_table_1;
 SELECT COUNT(*) FROM hbase_table_1_like;
 
+SHOW CREATE TABLE hbase_table_1_like;
+
 DROP TABLE hbase_table_1;
 DROP TABLE hbase_table_1_like;
 DROP TABLE hbase_table_2;

http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/results/positive/external_table_ppd.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/external_table_ppd.q.out b/hbase-handler/src/test/results/positive/external_table_ppd.q.out
index 83eb2f5..fd4b6ac 100644
--- a/hbase-handler/src/test/results/positive/external_table_ppd.q.out
+++ b/hbase-handler/src/test/results/positive/external_table_ppd.q.out
@@ -40,14 +40,14 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@t_hbase
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	from deserializer   
-tinyint_col         	tinyint             	from deserializer   
-smallint_col        	smallint            	from deserializer   
-int_col             	int                 	from deserializer   
-bigint_col          	bigint              	from deserializer   
-float_col           	float               	from deserializer   
-double_col          	double              	from deserializer   
-boolean_col         	boolean             	from deserializer   
+key                 	string              	                    
+tinyint_col         	tinyint             	                    
+smallint_col        	smallint            	                    
+int_col             	int                 	                    
+bigint_col          	bigint              	                    
+float_col           	float               	                    
+double_col          	double              	                    
+boolean_col         	boolean             	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 

http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out b/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out
index f212331..24df908 100644
--- a/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out
@@ -40,14 +40,14 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@t_hbase
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	from deserializer   
-tinyint_col         	tinyint             	from deserializer   
-smallint_col        	smallint            	from deserializer   
-int_col             	int                 	from deserializer   
-bigint_col          	bigint              	from deserializer   
-float_col           	float               	from deserializer   
-double_col          	double              	from deserializer   
-boolean_col         	boolean             	from deserializer   
+key                 	string              	                    
+tinyint_col         	tinyint             	                    
+smallint_col        	smallint            	                    
+int_col             	int                 	                    
+bigint_col          	bigint              	                    
+float_col           	float               	                    
+double_col          	double              	                    
+boolean_col         	boolean             	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
@@ -215,14 +215,14 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@t_hbase_1
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	from deserializer   
-tinyint_col         	tinyint             	from deserializer   
-smallint_col        	smallint            	from deserializer   
-int_col             	int                 	from deserializer   
-bigint_col          	bigint              	from deserializer   
-float_col           	float               	from deserializer   
-double_col          	double              	from deserializer   
-boolean_col         	boolean             	from deserializer   
+key                 	string              	                    
+tinyint_col         	tinyint             	                    
+smallint_col        	smallint            	                    
+int_col             	int                 	                    
+bigint_col          	bigint              	                    
+float_col           	float               	                    
+double_col          	double              	                    
+boolean_col         	boolean             	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 

http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/results/positive/hbase_queries.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/hbase_queries.q.out b/hbase-handler/src/test/results/positive/hbase_queries.q.out
index ce6e526..d887566 100644
--- a/hbase-handler/src/test/results/positive/hbase_queries.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_queries.q.out
@@ -2,14 +2,14 @@ PREHOOK: query: DROP TABLE hbase_table_1
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: DROP TABLE hbase_table_1
 POSTHOOK: type: DROPTABLE
-PREHOOK: query: CREATE TABLE hbase_table_1(key int, value string) 
+PREHOOK: query: CREATE TABLE hbase_table_1(key int comment 'It is a column key', value string
comment 'It is the column string value') 
 STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
 WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string")
 TBLPROPERTIES ("hbase.table.name" = "hbase_table_0")
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@hbase_table_1
-POSTHOOK: query: CREATE TABLE hbase_table_1(key int, value string) 
+POSTHOOK: query: CREATE TABLE hbase_table_1(key int comment 'It is a column key', value string
comment 'It is the column string value') 
 STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
 WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string")
 TBLPROPERTIES ("hbase.table.name" = "hbase_table_0")
@@ -22,8 +22,8 @@ PREHOOK: Input: default@hbase_table_1
 POSTHOOK: query: DESCRIBE EXTENDED hbase_table_1
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@hbase_table_1
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
+key                 	int                 	It is a column key  
+value               	string              	It is the column string value
 	 	 
 #### A masked pattern was here ####
 PREHOOK: query: select * from hbase_table_1
@@ -834,9 +834,9 @@ PREHOOK: Input: default@hbase_table_3_like
 POSTHOOK: query: DESCRIBE EXTENDED hbase_table_3_like
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@hbase_table_3_like
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-count               	int                 	from deserializer   
+key                 	int                 	                    
+value               	string              	                    
+count               	int                 	                    
 	 	 
 #### A masked pattern was here ####
 PREHOOK: query: INSERT OVERWRITE TABLE hbase_table_3_like SELECT * FROM hbase_table_3
@@ -878,8 +878,8 @@ PREHOOK: Input: default@hbase_table_1_like
 POSTHOOK: query: DESCRIBE EXTENDED hbase_table_1_like
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@hbase_table_1_like
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
+key                 	int                 	It is a column key  
+value               	string              	It is the column string value
 	 	 
 #### A masked pattern was here ####
 PREHOOK: query: INSERT OVERWRITE TABLE hbase_table_1_like SELECT * FROM hbase_table_1
@@ -899,6 +899,25 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@hbase_table_1_like
 #### A masked pattern was here ####
 155
+PREHOOK: query: SHOW CREATE TABLE hbase_table_1_like
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@hbase_table_1_like
+POSTHOOK: query: SHOW CREATE TABLE hbase_table_1_like
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@hbase_table_1_like
+CREATE EXTERNAL TABLE `hbase_table_1_like`(
+  `key` int COMMENT 'It is a column key', 
+  `value` string COMMENT 'It is the column string value')
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.hbase.HBaseSerDe' 
+STORED BY 
+  'org.apache.hadoop.hive.hbase.HBaseStorageHandler' 
+WITH SERDEPROPERTIES ( 
+  'hbase.columns.mapping'='cf:string', 
+  'serialization.format'='1')
+TBLPROPERTIES (
+  'hbase.table.name'='hbase_table_0', 
+#### A masked pattern was here ####
 PREHOOK: query: DROP TABLE hbase_table_1
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@hbase_table_1

http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/results/positive/hbase_timestamp.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/hbase_timestamp.q.out b/hbase-handler/src/test/results/positive/hbase_timestamp.q.out
index 538e551..6c42fc3 100644
--- a/hbase-handler/src/test/results/positive/hbase_timestamp.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_timestamp.q.out
@@ -20,9 +20,9 @@ PREHOOK: Input: default@hbase_table
 POSTHOOK: query: DESC extended hbase_table
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@hbase_table
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-time                	timestamp           	from deserializer   
+key                 	string              	                    
+value               	string              	                    
+time                	timestamp           	                    
 	 	 
 #### A masked pattern was here ####
 PREHOOK: query: FROM src INSERT OVERWRITE TABLE hbase_table SELECT key, value, "2012-02-23
10:14:52" WHERE (key % 17) = 0

http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out b/hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out
index 138cfe6..0428e41 100644
--- a/hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out
@@ -18,9 +18,9 @@ PREHOOK: Input: default@hbase_str
 POSTHOOK: query: describe hbase_str
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@hbase_str
-rowkey              	string              	from deserializer   
-mytime              	string              	from deserializer   
-mystr               	string              	from deserializer   
+rowkey              	string              	                    
+mytime              	string              	                    
+mystr               	string              	                    
 PREHOOK: query: insert overwrite table hbase_str select key, '2001-02-03-04.05.06.123456',
value from src limit 3
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
@@ -62,9 +62,9 @@ PREHOOK: Input: default@hbase_ts
 POSTHOOK: query: describe hbase_ts
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@hbase_ts
-rowkey              	string              	from deserializer   
-mytime              	timestamp           	from deserializer   
-mystr               	string              	from deserializer   
+rowkey              	string              	                    
+mytime              	timestamp           	                    
+mystr               	string              	                    
 PREHOOK: query: select * from hbase_ts
 PREHOOK: type: QUERY
 PREHOOK: Input: default@hbase_ts


Mime
View raw message