spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From marmb...@apache.org
Subject spark git commit: [SPARK-5872] [SQL] create a sqlCtx in pyspark shell
Date Tue, 17 Feb 2015 23:44:48 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-1.3 cb061603c -> 0dba382ee


[SPARK-5872] [SQL] create a sqlCtx in pyspark shell

The sqlCtx will be HiveContext if hive is built in assembly jar, or SQLContext if not.

It also skip the Hive tests in pyspark.sql.tests if no hive is available.

Author: Davies Liu <davies@databricks.com>

Closes #4659 from davies/sqlctx and squashes the following commits:

0e6629a [Davies Liu] sqlCtx in pyspark

(cherry picked from commit 4d4cc760fa9687ce563320094557ef9144488676)
Signed-off-by: Michael Armbrust <michael@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0dba382e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0dba382e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0dba382e

Branch: refs/heads/branch-1.3
Commit: 0dba382ee65694969704384c4968e3a656b3c833
Parents: cb06160
Author: Davies Liu <davies@databricks.com>
Authored: Tue Feb 17 15:44:37 2015 -0800
Committer: Michael Armbrust <michael@databricks.com>
Committed: Tue Feb 17 15:44:45 2015 -0800

----------------------------------------------------------------------
 python/pyspark/shell.py     | 13 ++++++++++++-
 python/pyspark/sql/tests.py | 12 ++++++++++--
 2 files changed, 22 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0dba382e/python/pyspark/shell.py
----------------------------------------------------------------------
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index 4cf4b89..1a02fec 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -31,8 +31,12 @@ if sys.version_info[0] != 2:
 import atexit
 import os
 import platform
+
+import py4j
+
 import pyspark
 from pyspark.context import SparkContext
+from pyspark.sql import SQLContext, HiveContext
 from pyspark.storagelevel import StorageLevel
 
 # this is the deprecated equivalent of ADD_JARS
@@ -46,6 +50,13 @@ if os.environ.get("SPARK_EXECUTOR_URI"):
 sc = SparkContext(appName="PySparkShell", pyFiles=add_files)
 atexit.register(lambda: sc.stop())
 
+try:
+    # Try to access HiveConf, it will raise exception if Hive is not added
+    sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
+    sqlCtx = HiveContext(sc)
+except py4j.protocol.Py4JError:
+    sqlCtx = SQLContext(sc)
+
 print("""Welcome to
       ____              __
      / __/__  ___ _____/ /__
@@ -57,7 +68,7 @@ print("Using Python version %s (%s, %s)" % (
     platform.python_version(),
     platform.python_build()[0],
     platform.python_build()[1]))
-print("SparkContext available as sc.")
+print("SparkContext available as sc, %s available as sqlCtx." % sqlCtx.__class__.__name__)
 
 if add_files is not None:
     print("Warning: ADD_FILES environment variable is deprecated, use --py-files argument
instead")

http://git-wip-us.apache.org/repos/asf/spark/blob/0dba382e/python/pyspark/sql/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index aa80bca..52f7e65 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -25,6 +25,8 @@ import pydoc
 import shutil
 import tempfile
 
+import py4j
+
 if sys.version_info[:2] <= (2, 6):
     try:
         import unittest2 as unittest
@@ -329,9 +331,12 @@ class HiveContextSQLTests(ReusedPySparkTestCase):
     def setUpClass(cls):
         ReusedPySparkTestCase.setUpClass()
         cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        try:
+            cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
+        except py4j.protocol.Py4JError:
+            cls.sqlCtx = None
+            return
         os.unlink(cls.tempdir.name)
-        print "type", type(cls.sc)
-        print "type", type(cls.sc._jsc)
         _scala_HiveContext =\
             cls.sc._jvm.org.apache.spark.sql.hive.test.TestHiveContext(cls.sc._jsc.sc())
         cls.sqlCtx = HiveContext(cls.sc, _scala_HiveContext)
@@ -344,6 +349,9 @@ class HiveContextSQLTests(ReusedPySparkTestCase):
         shutil.rmtree(cls.tempdir.name, ignore_errors=True)
 
     def test_save_and_load_table(self):
+        if self.sqlCtx is None:
+            return  # no hive available, skipped
+
         df = self.df
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message