spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject spark git commit: [SPARK-11997] [SQL] NPE when save a DataFrame as parquet and partitioned by long column
Date Fri, 27 Nov 2015 05:04:54 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 d2a5a4930 -> bb3fe0a64


[SPARK-11997] [SQL] NPE when save a DataFrame as parquet and partitioned by long column

Check for partition column null-ability while building the partition spec.

Author: Dilip Biswal <dbiswal@us.ibm.com>

Closes #10001 from dilipbiswal/spark-11997.

(cherry picked from commit a374e20b5492c775f20d32e8fbddadbd8098a655)
Signed-off-by: Davies Liu <davies.liu@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bb3fe0a6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bb3fe0a6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bb3fe0a6

Branch: refs/heads/branch-1.6
Commit: bb3fe0a646055fcc3a4fa14ff0df14dec5508393
Parents: d2a5a49
Author: Dilip Biswal <dbiswal@us.ibm.com>
Authored: Thu Nov 26 21:04:40 2015 -0800
Committer: Davies Liu <davies.liu@gmail.com>
Committed: Thu Nov 26 21:04:50 2015 -0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/sources/interfaces.scala      |  2 +-
 .../datasources/parquet/ParquetQuerySuite.scala        | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/bb3fe0a6/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index f946515..9ace25d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -607,7 +607,7 @@ abstract class HadoopFsRelation private[sql](
         def castPartitionValuesToUserSchema(row: InternalRow) = {
           InternalRow((0 until row.numFields).map { i =>
             Cast(
-              Literal.create(row.getString(i), StringType),
+              Literal.create(row.getUTF8String(i), StringType),
               userProvidedSchema.fields(i).dataType).eval()
           }: _*)
         }

http://git-wip-us.apache.org/repos/asf/spark/blob/bb3fe0a6/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 70fae32..f777e97 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -252,6 +252,19 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
     }
   }
 
+  test("SPARK-11997 parquet with null partition values") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+      sqlContext.range(1, 3)
+        .selectExpr("if(id % 2 = 0, null, id) AS n", "id")
+        .write.partitionBy("n").parquet(path)
+
+      checkAnswer(
+        sqlContext.read.parquet(path).filter("n is null"),
+        Row(2, null))
+    }
+  }
+
   // This test case is ignored because of parquet-mr bug PARQUET-370
   ignore("SPARK-10301 requested schema clipping - schemas with disjoint sets of fields")
{
     withTempPath { dir =>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message