spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zsxw...@apache.org
Subject spark git commit: [SPARK-15991] SparkContext.hadoopConfiguration should be always the base of hadoop conf created by SessionState
Date Fri, 17 Jun 2016 00:06:33 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 8f7138859 -> b3678eb7e


[SPARK-15991] SparkContext.hadoopConfiguration should be always the base of hadoop conf created
by SessionState

## What changes were proposed in this pull request?
Before this patch, after a SparkSession has been created, hadoop conf set directly to SparkContext.hadoopConfiguration
will not affect the hadoop conf created by SessionState. This patch makes the change to always
use SparkContext.hadoopConfiguration  as the base.

This patch also changes the behavior of hive-site.xml support added in https://github.com/apache/spark/pull/12689/.
With this patch, we will load hive-site.xml to SparkContext.hadoopConfiguration.

## How was this patch tested?
New test in SparkSessionBuilderSuite.

Author: Yin Huai <yhuai@databricks.com>

Closes #13711 from yhuai/SPARK-15991.

(cherry picked from commit d9c6628c47de547dc537310e3c775c7f3e0e4a12)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3678eb7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3678eb7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3678eb7

Branch: refs/heads/branch-2.0
Commit: b3678eb7e4ac6bb08ba8579867944ba42da99b81
Parents: 8f71388
Author: Yin Huai <yhuai@databricks.com>
Authored: Thu Jun 16 17:06:24 2016 -0700
Committer: Shixiong Zhu <shixiong@databricks.com>
Committed: Thu Jun 16 17:06:30 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/internal/SessionState.scala       |  2 +-
 .../apache/spark/sql/internal/SharedState.scala | 14 ++++----------
 .../org/apache/spark/sql/SQLQuerySuite.scala    |  4 ----
 .../spark/sql/SparkSessionBuilderSuite.scala    | 20 ++++++++++++++++++++
 .../apache/spark/sql/hive/HiveSharedState.scala |  5 +++--
 5 files changed, 28 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b3678eb7/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 59efa81..dc95123 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -49,7 +49,7 @@ private[sql] class SessionState(sparkSession: SparkSession) {
   lazy val conf: SQLConf = new SQLConf
 
   def newHadoopConf(): Configuration = {
-    val hadoopConf = new Configuration(sparkSession.sharedState.hadoopConf)
+    val hadoopConf = new Configuration(sparkSession.sparkContext.hadoopConfiguration)
     conf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, v) }
     hadoopConf
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/b3678eb7/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index bc349b4..6c43fe3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -43,23 +43,17 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends
Logging {
    */
   val listener: SQLListener = createListenerAndUI(sparkContext)
 
-  /**
-   * The base hadoop configuration which is shared among all spark sessions. It is based
on the
-   * default hadoop configuration of Spark, with custom configurations inside `hive-site.xml`.
-   */
-  val hadoopConf: Configuration = {
-    val conf = new Configuration(sparkContext.hadoopConfiguration)
+  {
     val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
     if (configFile != null) {
-      conf.addResource(configFile)
+      sparkContext.hadoopConfiguration.addResource(configFile)
     }
-    conf
   }
 
   /**
    * A catalog that interacts with external systems.
    */
-  lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(hadoopConf)
+  lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(sparkContext.hadoopConfiguration)
 
   /**
    * A classloader used to load all user-added jar.
@@ -71,7 +65,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging
{
     // Set the Hive metastore warehouse path to the one we use
     val tempConf = new SQLConf
     sparkContext.conf.getAll.foreach { case (k, v) => tempConf.setConfString(k, v) }
-    val hiveWarehouseDir = hadoopConf.get("hive.metastore.warehouse.dir")
+    val hiveWarehouseDir = sparkContext.hadoopConfiguration.get("hive.metastore.warehouse.dir")
     if (hiveWarehouseDir != null && !tempConf.contains(SQLConf.WAREHOUSE_PATH.key))
{
       // If hive.metastore.warehouse.dir is set and spark.sql.warehouse.dir is not set,
       // we will respect the value of hive.metastore.warehouse.dir.

http://git-wip-us.apache.org/repos/asf/spark/blob/b3678eb7/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 545c177..bbe821b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2870,8 +2870,4 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       sql(s"SELECT '$literal' AS DUMMY"),
       Row(s"$expected") :: Nil)
   }
-
-  test("SPARK-15887: hive-site.xml should be loaded") {
-    assert(spark.sessionState.newHadoopConf().get("hive.in.test") == "true")
-  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/b3678eb7/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 786956d..418345b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -102,4 +102,24 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     assert(session.sparkContext.conf.get("key2") == "value2")
     session.stop()
   }
+
+  test("SPARK-15887: hive-site.xml should be loaded") {
+    val session = SparkSession.builder().master("local").getOrCreate()
+    assert(session.sessionState.newHadoopConf().get("hive.in.test") == "true")
+    assert(session.sparkContext.hadoopConfiguration.get("hive.in.test") == "true")
+    session.stop()
+  }
+
+  test("SPARK-15991: Set global Hadoop conf") {
+    val session = SparkSession.builder().master("local").getOrCreate()
+    val mySpecialKey = "my.special.key.15991"
+    val mySpecialValue = "msv"
+    try {
+      session.sparkContext.hadoopConfiguration.set(mySpecialKey, mySpecialValue)
+      assert(session.sessionState.newHadoopConf().get(mySpecialKey) == mySpecialValue)
+    } finally {
+      session.sparkContext.hadoopConfiguration.unset(mySpecialKey)
+      session.stop()
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/b3678eb7/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala
index 6b7a333..12b4962 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala
@@ -36,11 +36,12 @@ private[hive] class HiveSharedState(override val sparkContext: SparkContext)
    */
   // This needs to be a lazy val at here because TestHiveSharedState is overriding it.
   lazy val metadataHive: HiveClient = {
-    HiveUtils.newClientForMetadata(sparkContext.conf, hadoopConf)
+    HiveUtils.newClientForMetadata(sparkContext.conf, sparkContext.hadoopConfiguration)
   }
 
   /**
    * A catalog that interacts with the Hive metastore.
    */
-  override lazy val externalCatalog = new HiveExternalCatalog(metadataHive, hadoopConf)
+  override lazy val externalCatalog =
+    new HiveExternalCatalog(metadataHive, sparkContext.hadoopConfiguration)
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message