spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gurwls...@apache.org
Subject spark git commit: [SPARK-23094][SPARK-23723][SPARK-23724][SQL][FOLLOW-UP] Support custom encoding for json files
Date Tue, 08 May 2018 13:24:43 GMT
Repository: spark
Updated Branches:
  refs/heads/master b54bbe57b -> 2f6fe7d67


[SPARK-23094][SPARK-23723][SPARK-23724][SQL][FOLLOW-UP] Support custom encoding for json files

## What changes were proposed in this pull request?
This is to add a test case to check the behaviors when users write json in the specified UTF-16/UTF-32
encoding with multiline off.

## How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #21254 from gatorsmile/followupSPARK-23094.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f6fe7d6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f6fe7d6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f6fe7d6

Branch: refs/heads/master
Commit: 2f6fe7d679a878ffd103cac6f06081c5b3888744
Parents: b54bbe5
Author: gatorsmile <gatorsmile@gmail.com>
Authored: Tue May 8 21:24:35 2018 +0800
Committer: hyukjinkwon <gurwls223@apache.org>
Committed: Tue May 8 21:24:35 2018 +0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/json/JSONOptions.scala    |  9 +++++----
 .../execution/datasources/json/JsonSuite.scala   | 19 +++++++++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2f6fe7d6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 5f130af..2579374 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -110,11 +110,12 @@ private[sql] class JSONOptions(
       val blacklist = Seq(Charset.forName("UTF-16"), Charset.forName("UTF-32"))
       val isBlacklisted = blacklist.contains(Charset.forName(enc))
       require(multiLine || !isBlacklisted,
-        s"""The ${enc} encoding must not be included in the blacklist when multiLine is disabled:
-           | ${blacklist.mkString(", ")}""".stripMargin)
+        s"""The $enc encoding in the blacklist is not allowed when multiLine is disabled.
+          |Blacklist: ${blacklist.mkString(", ")}""".stripMargin)
+
+      val isLineSepRequired =
+        multiLine || Charset.forName(enc) == StandardCharsets.UTF_8 || lineSeparator.nonEmpty
 
-      val isLineSepRequired = !(multiLine == false &&
-        Charset.forName(enc) != StandardCharsets.UTF_8 && lineSeparator.isEmpty)
       require(isLineSepRequired, s"The lineSep option must be specified for the $enc encoding")
 
       enc

http://git-wip-us.apache.org/repos/asf/spark/blob/2f6fe7d6/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 0db688f..4b3921c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -2313,6 +2313,25 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData
{
     }
   }
 
+  test("SPARK-23723: write json in UTF-16/32 with multiline off") {
+    Seq("UTF-16", "UTF-32").foreach { encoding =>
+      withTempPath { path =>
+        val ds = spark.createDataset(Seq(
+          ("a", 1), ("b", 2), ("c", 3))
+        ).repartition(2)
+        val e = intercept[IllegalArgumentException] {
+          ds.write
+            .option("encoding", encoding)
+            .option("multiline", "false")
+            .format("json").mode("overwrite")
+            .save(path.getCanonicalPath)
+        }.getMessage
+        assert(e.contains(
+          s"$encoding encoding in the blacklist is not allowed when multiLine is disabled"))
+      }
+    }
+  }
+
   def checkReadJson(lineSep: String, encoding: String, inferSchema: Boolean, id: Int): Unit
= {
     test(s"SPARK-23724: checks reading json in ${encoding} #${id}") {
       val schema = new StructType().add("f1", StringType).add("f2", IntegerType)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message