spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-15585][SQL] Add doc for turning off quotations
Date Sat, 11 Jun 2016 22:12:23 GMT
Repository: spark
Updated Branches:
  refs/heads/master ad102af16 -> cb5d933d8


[SPARK-15585][SQL] Add doc for turning off quotations

## What changes were proposed in this pull request?
This pr is to add doc for turning off quotations because this behavior is different from `com.databricks.spark.csv`.

## How was this patch tested?
Check behavior  to put an empty string in csv options.

Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>

Closes #13616 from maropu/SPARK-15585-2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cb5d933d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cb5d933d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cb5d933d

Branch: refs/heads/master
Commit: cb5d933d86ac4afd947874f1f1c31c7154cb8249
Parents: ad102af
Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Authored: Sat Jun 11 15:12:21 2016 -0700
Committer: Reynold Xin <rxin@databricks.com>
Committed: Sat Jun 11 15:12:21 2016 -0700

----------------------------------------------------------------------
 python/pyspark/sql/readwriter.py                          |  6 ++++--
 .../main/scala/org/apache/spark/sql/DataFrameReader.scala |  4 +++-
 .../spark/sql/execution/datasources/csv/CSVSuite.scala    | 10 ++++++++++
 3 files changed, 17 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/cb5d933d/python/pyspark/sql/readwriter.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 9208a52..7d1f186 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -320,7 +320,8 @@ class DataFrameReader(object):
                          it uses the default value, ``UTF-8``.
         :param quote: sets the single character used for escaping quoted values where the
                       separator can be part of the value. If None is set, it uses the default
-                      value, ``"``.
+                      value, ``"``. If you would like to turn off quotations, you need to
set an
+                      empty string.
         :param escape: sets the single character used for escaping quotes inside an already
                        quoted value. If None is set, it uses the default value, ``\``.
         :param comment: sets the single character used for skipping lines beginning with
this
@@ -804,7 +805,8 @@ class DataFrameWriter(object):
                     set, it uses the default value, ``,``.
         :param quote: sets the single character used for escaping quoted values where the
                       separator can be part of the value. If None is set, it uses the default
-                      value, ``"``.
+                      value, ``"``. If you would like to turn off quotations, you need to
set an
+                      empty string.
         :param escape: sets the single character used for escaping quotes inside an already
                        quoted value. If None is set, it uses the default value, ``\``
         :param escapeQuotes: A flag indicating whether values containing quotes should always

http://git-wip-us.apache.org/repos/asf/spark/blob/cb5d933d/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index b248583..bb5fa2b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -370,7 +370,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends
Logging {
    * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding
    * type.</li>
    * <li>`quote` (default `"`): sets the single character used for escaping quoted
values where
-   * the separator can be part of the value.</li>
+   * the separator can be part of the value. If you would like to turn off quotations, you
need to
+   * set not `null` but an empty string. This behaviour is different form
+   * `com.databricks.spark.csv`.</li>
    * <li>`escape` (default `\`): sets the single character used for escaping quotes
inside
    * an already quoted value.</li>
    * <li>`comment` (default empty string): sets the single character used for skipping
lines

http://git-wip-us.apache.org/repos/asf/spark/blob/cb5d933d/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index bc95446..f170065 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -655,4 +655,14 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils
{
       assert(msg.contains("CSV data source does not support array<string> data type"))
     }
   }
+
+  test("SPARK-15585 turn off quotations") {
+    val cars = spark.read
+      .format("csv")
+      .option("header", "true")
+      .option("quote", "")
+      .load(testFile(carsUnbalancedQuotesFile))
+
+    verifyCars(cars, withHeader = true, checkValues = false)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message