spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From marmb...@apache.org
Subject spark git commit: [SPARK-5824] [SQL] add null format in ctas and set default col comment to null
Date Mon, 16 Feb 2015 20:31:39 GMT
Repository: spark
Updated Branches:
  refs/heads/master cc552e042 -> 275a0c081


[SPARK-5824] [SQL] add null format in ctas and set default col comment to null

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #4609 from adrian-wang/ctas and squashes the following commits:

0a75d5a [Daoyuan Wang] reorder import
93d1863 [Daoyuan Wang] add null format in ctas and set default col comment to null


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/275a0c08
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/275a0c08
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/275a0c08

Branch: refs/heads/master
Commit: 275a0c08134dea1896eab73a8e017256900fb1db
Parents: cc552e0
Author: Daoyuan Wang <daoyuan.wang@intel.com>
Authored: Mon Feb 16 12:31:36 2015 -0800
Committer: Michael Armbrust <michael@databricks.com>
Committed: Mon Feb 16 12:31:36 2015 -0800

----------------------------------------------------------------------
 .../hive/execution/HiveCompatibilitySuite.scala   |  1 +
 .../spark/sql/hive/HiveMetastoreCatalog.scala     |  3 ++-
 ...llformatCTAS-0-36f9196395758cebfed837a1c391a1e |  0
 ...lformatCTAS-1-b5a31d4cb34218b8de1ac3fed59fa75b |  0
 ...formatCTAS-10-7f4f04b87c7ef9653b4646949b24cf0b | 10 ++++++++++
 ...formatCTAS-11-4a4c16b53c612d00012d338c97bf5281 |  0
 ...formatCTAS-12-7f4f04b87c7ef9653b4646949b24cf0b | 10 ++++++++++
 ...formatCTAS-13-2e59caa113585495d8684fee69d88bc0 |  0
 ...formatCTAS-14-ad9fe9d68c2cf492259af4f6167c1b12 |  0
 ...lformatCTAS-2-aa2bdbd93668dceae43d1a02f2ede68d |  0
 ...lformatCTAS-3-b0057150f237050f38c1efa1f2d6b273 |  6 ++++++
 ...lformatCTAS-4-16c7086f39d6458b6c5cf2479f0473bd |  0
 ...lformatCTAS-5-183d77b734ce6a373de5b3ebe1cd04c9 |  0
 ...lformatCTAS-6-159fff36b548e00ee952d1df8ef19833 |  0
 ...lformatCTAS-7-46900b082b02ce3e58087d1f41128f65 |  4 ++++
 ...lformatCTAS-8-7f26cbd6be5631a3acce26f667d1c5d8 | 18 ++++++++++++++++++
 ...lformatCTAS-9-22e1b3899de7087b39c24d9d8f643b47 |  1 +
 .../scala/org/apache/spark/sql/hive/Shim12.scala  |  2 ++
 .../scala/org/apache/spark/sql/hive/Shim13.scala  |  7 +++++++
 19 files changed, 61 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index e443e5b..133f2d3 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -640,6 +640,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter
{
     "nonblock_op_deduplicate",
     "notable_alias1",
     "notable_alias2",
+    "nullformatCTAS",
     "nullgroup",
     "nullgroup2",
     "nullgroup3",

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 6d794d0..f82778c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -240,7 +240,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog
with
     val hiveSchema: JList[FieldSchema] = if (schema == null || schema.isEmpty) {
       crtTbl.getCols
     } else {
-      schema.map(attr => new FieldSchema(attr.name, toMetastoreType(attr.dataType), ""))
+      schema.map(attr => new FieldSchema(attr.name, toMetastoreType(attr.dataType), null))
     }
     tbl.setFields(hiveSchema)
 
@@ -314,6 +314,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog
with
     if (crtTbl != null && crtTbl.getLineDelim() != null) {
       tbl.setSerdeParam(serdeConstants.LINE_DELIM, crtTbl.getLineDelim())
     }
+    HiveShim.setTblNullFormat(crtTbl, tbl)
 
     if (crtTbl != null && crtTbl.getSerdeProps() != null) {
       val iter = crtTbl.getSerdeProps().entrySet().iterator()

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-0-36f9196395758cebfed837a1c391a1e
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-0-36f9196395758cebfed837a1c391a1e
b/sql/hive/src/test/resources/golden/nullformatCTAS-0-36f9196395758cebfed837a1c391a1e
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-1-b5a31d4cb34218b8de1ac3fed59fa75b
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-1-b5a31d4cb34218b8de1ac3fed59fa75b
b/sql/hive/src/test/resources/golden/nullformatCTAS-1-b5a31d4cb34218b8de1ac3fed59fa75b
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-10-7f4f04b87c7ef9653b4646949b24cf0b
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-10-7f4f04b87c7ef9653b4646949b24cf0b
b/sql/hive/src/test/resources/golden/nullformatCTAS-10-7f4f04b87c7ef9653b4646949b24cf0b
new file mode 100644
index 0000000..e74deff
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/nullformatCTAS-10-7f4f04b87c7ef9653b4646949b24cf0b
@@ -0,0 +1,10 @@
+1.0	1
+1.0	1
+1.0	1
+1.0	1
+1.0	1
+NULL	1
+NULL	NULL
+1.0	NULL
+1.0	1
+1.0	1

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-11-4a4c16b53c612d00012d338c97bf5281
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-11-4a4c16b53c612d00012d338c97bf5281
b/sql/hive/src/test/resources/golden/nullformatCTAS-11-4a4c16b53c612d00012d338c97bf5281
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-12-7f4f04b87c7ef9653b4646949b24cf0b
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-12-7f4f04b87c7ef9653b4646949b24cf0b
b/sql/hive/src/test/resources/golden/nullformatCTAS-12-7f4f04b87c7ef9653b4646949b24cf0b
new file mode 100644
index 0000000..00ebb52
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/nullformatCTAS-12-7f4f04b87c7ef9653b4646949b24cf0b
@@ -0,0 +1,10 @@
+1.0	1
+1.0	1
+1.0	1
+1.0	1
+1.0	1
+fooNull	1
+fooNull	fooNull
+1.0	fooNull
+1.0	1
+1.0	1

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-13-2e59caa113585495d8684fee69d88bc0
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-13-2e59caa113585495d8684fee69d88bc0
b/sql/hive/src/test/resources/golden/nullformatCTAS-13-2e59caa113585495d8684fee69d88bc0
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-14-ad9fe9d68c2cf492259af4f6167c1b12
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-14-ad9fe9d68c2cf492259af4f6167c1b12
b/sql/hive/src/test/resources/golden/nullformatCTAS-14-ad9fe9d68c2cf492259af4f6167c1b12
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-2-aa2bdbd93668dceae43d1a02f2ede68d
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-2-aa2bdbd93668dceae43d1a02f2ede68d
b/sql/hive/src/test/resources/golden/nullformatCTAS-2-aa2bdbd93668dceae43d1a02f2ede68d
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-3-b0057150f237050f38c1efa1f2d6b273
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-3-b0057150f237050f38c1efa1f2d6b273
b/sql/hive/src/test/resources/golden/nullformatCTAS-3-b0057150f237050f38c1efa1f2d6b273
new file mode 100644
index 0000000..b00bcb3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/nullformatCTAS-3-b0057150f237050f38c1efa1f2d6b273
@@ -0,0 +1,6 @@
+a                   	string              	                    
+b                   	string              	                    
+c                   	string              	                    
+d                   	string              	                    
+	 	 
+Detailed Table Information	Table(tableName:base_tab, dbName:default, owner:animal, createTime:1423973915,
lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string,
comment:null), FieldSchema(name:b, type:string, comment:null), FieldSchema(name:c, type:string,
comment:null), FieldSchema(name:d, type:string, comment:null)], location:file:/tmp/sparkHiveWarehouse2573474017665704744/base_tab,
inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat,
compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe,
parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[],
skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[],
parameters:{numFiles=1, transient_lastDdlTime=1423973915, COLUMN_STATS_ACCURATE=true, tota
 lSize=130, numRows=0, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)


http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-4-16c7086f39d6458b6c5cf2479f0473bd
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-4-16c7086f39d6458b6c5cf2479f0473bd
b/sql/hive/src/test/resources/golden/nullformatCTAS-4-16c7086f39d6458b6c5cf2479f0473bd
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-5-183d77b734ce6a373de5b3ebe1cd04c9
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-5-183d77b734ce6a373de5b3ebe1cd04c9
b/sql/hive/src/test/resources/golden/nullformatCTAS-5-183d77b734ce6a373de5b3ebe1cd04c9
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-6-159fff36b548e00ee952d1df8ef19833
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-6-159fff36b548e00ee952d1df8ef19833
b/sql/hive/src/test/resources/golden/nullformatCTAS-6-159fff36b548e00ee952d1df8ef19833
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-7-46900b082b02ce3e58087d1f41128f65
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-7-46900b082b02ce3e58087d1f41128f65
b/sql/hive/src/test/resources/golden/nullformatCTAS-7-46900b082b02ce3e58087d1f41128f65
new file mode 100644
index 0000000..264c973
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/nullformatCTAS-7-46900b082b02ce3e58087d1f41128f65
@@ -0,0 +1,4 @@
+a                   	string              	                    
+b                   	string              	                    
+	 	 
+Detailed Table Information	Table(tableName:null_tab3, dbName:default, owner:animal, createTime:1423973928,
lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string,
comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/tmp/sparkHiveWarehouse2573474017665704744/null_tab3,
inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat,
compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe,
parameters:{serialization.null.format=fooNull, serialization.format=1}), bucketCols:[], sortCols:[],
parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}),
storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1423973928,
COLUMN_STATS_ACCURATE=true, totalSize=80, numRows=10, rawDataSize=70}, viewOriginalText:nul
 l, viewExpandedText:null, tableType:MANAGED_TABLE)	

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-8-7f26cbd6be5631a3acce26f667d1c5d8
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-8-7f26cbd6be5631a3acce26f667d1c5d8
b/sql/hive/src/test/resources/golden/nullformatCTAS-8-7f26cbd6be5631a3acce26f667d1c5d8
new file mode 100644
index 0000000..881917b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/nullformatCTAS-8-7f26cbd6be5631a3acce26f667d1c5d8
@@ -0,0 +1,18 @@
+CREATE  TABLE `null_tab3`(
+  `a` string, 
+  `b` string)
+ROW FORMAT DELIMITED 
+  NULL DEFINED AS 'fooNull' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  'file:/tmp/sparkHiveWarehouse2573474017665704744/null_tab3'
+TBLPROPERTIES (
+  'numFiles'='1', 
+  'transient_lastDdlTime'='1423973928', 
+  'COLUMN_STATS_ACCURATE'='true', 
+  'totalSize'='80', 
+  'numRows'='10', 
+  'rawDataSize'='70')

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/src/test/resources/golden/nullformatCTAS-9-22e1b3899de7087b39c24d9d8f643b47
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/nullformatCTAS-9-22e1b3899de7087b39c24d9d8f643b47
b/sql/hive/src/test/resources/golden/nullformatCTAS-9-22e1b3899de7087b39c24d9d8f643b47
new file mode 100644
index 0000000..3a2e3f4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/nullformatCTAS-9-22e1b3899de7087b39c24d9d8f643b47
@@ -0,0 +1 @@
+-1

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
----------------------------------------------------------------------
diff --git a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
index b5a0754..8534c7d 100644
--- a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
+++ b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
@@ -245,6 +245,8 @@ private[hive] object HiveShim {
   def prepareWritable(w: Writable): Writable = {
     w
   }
+
+  def setTblNullFormat(crtTbl: CreateTableDesc, tbl: Table) = {}
 }
 
 class ShimFileSinkDesc(var dir: String, var tableInfo: TableDesc, var compressed: Boolean)

http://git-wip-us.apache.org/repos/asf/spark/blob/275a0c08/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
----------------------------------------------------------------------
diff --git a/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
index e4c1809..72104f5 100644
--- a/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
+++ b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.Context
 import org.apache.hadoop.hive.ql.metadata.{Table, Hive, Partition}
 import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc}
 import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory
+import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, DecimalTypeInfo, TypeInfoFactory}
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector,
PrimitiveObjectInspectorFactory}
 import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector, ObjectInspector}
@@ -410,6 +411,12 @@ private[hive] object HiveShim {
     }
     w
   }
+
+  def setTblNullFormat(crtTbl: CreateTableDesc, tbl: Table) = {
+    if (crtTbl != null && crtTbl.getNullFormat() != null) {
+      tbl.setSerdeParam(serdeConstants.SERIALIZATION_NULL_FORMAT, crtTbl.getNullFormat())
+    }
+  }
 }
 
 /*


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message