spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From marmb...@apache.org
Subject spark git commit: [SQL] Make dataframe more tolerant of being serialized
Date Thu, 12 Feb 2015 03:06:08 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-1.3 bcb13827c -> 3c1b9bf65


[SQL] Make dataframe more tolerant of being serialized

Eases use in the spark-shell.

Author: Michael Armbrust <michael@databricks.com>

Closes #4545 from marmbrus/serialization and squashes the following commits:

04748e6 [Michael Armbrust] @scala.annotation.varargs
b36e219 [Michael Armbrust] moreFixes

(cherry picked from commit a38e23c30fb5d12f8f46a119d91a0620036e6800)
Signed-off-by: Michael Armbrust <michael@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3c1b9bf6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3c1b9bf6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3c1b9bf6

Branch: refs/heads/branch-1.3
Commit: 3c1b9bf65290cc1fd4444690a5c5c252667e4576
Parents: bcb1382
Author: Michael Armbrust <michael@databricks.com>
Authored: Wed Feb 11 19:05:49 2015 -0800
Committer: Michael Armbrust <michael@databricks.com>
Committed: Wed Feb 11 19:06:05 2015 -0800

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/sql/DataFrame.scala     | 2 +-
 .../src/main/scala/org/apache/spark/sql/DataFrameImpl.scala | 4 ++--
 sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala      | 9 +++++++++
 .../src/main/scala/org/apache/spark/sql/SQLContext.scala    | 4 +++-
 4 files changed, 15 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3c1b9bf6/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 327cf87..13aff76 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -76,7 +76,7 @@ private[sql] object DataFrame {
  */
 // TODO: Improve documentation.
 @Experimental
-trait DataFrame extends RDDApi[Row] {
+trait DataFrame extends RDDApi[Row] with Serializable {
 
   val sqlContext: SQLContext
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3c1b9bf6/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
index 3863df5..4c6e19c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
@@ -44,8 +44,8 @@ import org.apache.spark.sql.types.{NumericType, StructType}
  * Internal implementation of [[DataFrame]]. Users of the API should use [[DataFrame]] directly.
  */
 private[sql] class DataFrameImpl protected[sql](
-    override val sqlContext: SQLContext,
-    val queryExecution: SQLContext#QueryExecution)
+    @transient override val sqlContext: SQLContext,
+    @transient val queryExecution: SQLContext#QueryExecution)
   extends DataFrame {
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/3c1b9bf6/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala
index 6bf21dd..7bc7683 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala
@@ -167,6 +167,15 @@ object Dsl {
   //////////////////////////////////////////////////////////////////////////////////////////////
 
   /**
+   * Returns the first column that is not null.
+   * {{{
+   *   df.select(coalesce(df("a"), df("b")))
+   * }}}
+   */
+  @scala.annotation.varargs
+  def coalesce(e: Column*): Column = Coalesce(e.map(_.expr))
+
+  /**
    * Unary minus, i.e. negate the expression.
    * {{{
    *   // Select the amount column and negates all values.

http://git-wip-us.apache.org/repos/asf/spark/blob/3c1b9bf6/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index ca5e62f..8aae222 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -129,9 +129,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * A collection of methods that are considered experimental, but can be used to hook into
    * the query planner for advanced functionalities.
    */
+  @transient
   val experimental: ExperimentalMethods = new ExperimentalMethods(this)
 
   /** Returns a [[DataFrame]] with no rows or columns. */
+  @transient
   lazy val emptyDataFrame = DataFrame(this, NoRelation)
 
   /**
@@ -178,7 +180,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * (Scala-specific)
    * Implicit methods available in Scala for converting common Scala objects into [[DataFrame]]s.
    */
-  object implicits {
+  object implicits extends Serializable {
     // scalastyle:on
 
     /** Creates a DataFrame from an RDD of case classes or tuples. */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message