Mailing-List: contact commits-help@zeppelin.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@zeppelin.apache.org
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: moon@apache.org
To: commits@zeppelin.apache.org
Message-Id: <abc06c13386247dc84f4670f7ae6d98c@git.apache.org>
Subject: zeppelin git commit: [ZEPPELIN-1034] Add Spark Interpreter option to
 not import implicits
Date: Fri, 24 Jun 2016 07:36:13 +0000 (UTC)
archived-at: Fri, 24 Jun 2016 07:36:15 -0000

Repository: zeppelin
Updated Branches:
  refs/heads/branch-0.6 25b2cd20f -> 024bad9ba


[ZEPPELIN-1034] Add Spark Interpreter option to not import implicits

### What is this PR for?
This PR adds option `zeppelin.spark.importImplicit` to not import `SQLContext` implicits, UDF collections. It is `true`, which means importing implicit functions and has been the default behaviour.

### What type of PR is it?
Feature

### Todos

### What is the Jira issue?
https://issues.apache.org/jira/browse/ZEPPELIN-1034

### How should this be tested?
Added unit-tests, also manual testing using similar to unit-tests scenario.

### Screenshots (if appropriate)

### Questions:
* Does this needs documentation?

Documentation is updated to include description for new option.

Author: sadikovi <ivan.sadikov@lincolnuni.ac.nz>

Closes #1049 from sadikovi/ZEPPELIN-1034 and squashes the following commits:

772ea78 [sadikovi] update tests
4aabdef [sadikovi] add option to import implicits

(cherry picked from commit 878a8c76cfc754974f35fbb5e74ebe3821152d7f)
Signed-off-by: Lee moon soo <moon@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/zeppelin/repo
Commit: http://git-wip-us.apache.org/repos/asf/zeppelin/commit/024bad9b
Tree: http://git-wip-us.apache.org/repos/asf/zeppelin/tree/024bad9b
Diff: http://git-wip-us.apache.org/repos/asf/zeppelin/diff/024bad9b

Branch: refs/heads/branch-0.6
Commit: 024bad9ba2f5844de7233b817bab94ecba18b41b
Parents: 25b2cd2
Author: sadikovi <ivan.sadikov@lincolnuni.ac.nz>
Authored: Tue Jun 21 18:00:26 2016 +1200
Committer: Lee moon soo <moon@apache.org>
Committed: Fri Jun 24 00:38:09 2016 -0700

----------------------------------------------------------------------
 conf/zeppelin-env.cmd.template                  |  3 +-
 conf/zeppelin-env.sh.template                   |  5 +--
 docs/interpreter/spark.md                       |  7 ++++-
 .../apache/zeppelin/spark/SparkInterpreter.java | 20 +++++++-----
 .../src/main/resources/interpreter-setting.json |  6 ++++
 .../zeppelin/spark/SparkInterpreterTest.java    | 32 ++++++++++++++++++++
 6 files changed, 62 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/zeppelin/blob/024bad9b/conf/zeppelin-env.cmd.template
----------------------------------------------------------------------
diff --git a/conf/zeppelin-env.cmd.template b/conf/zeppelin-env.cmd.template
index 59953dc..d85e59f 100644
--- a/conf/zeppelin-env.cmd.template
+++ b/conf/zeppelin-env.cmd.template
@@ -55,12 +55,13 @@ REM set HADOOP_CONF_DIR         		REM yarn-site.xml is located in configuration
 REM Pyspark (supported with Spark 1.2.1 and above)
 REM To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
 REM set PYSPARK_PYTHON          		REM path to the python command. must be the same path on the driver(Zeppelin) and all workers.
-REM set PYTHONPATH  
+REM set PYTHONPATH
 
 REM Spark interpreter options
 REM
 REM set ZEPPELIN_SPARK_USEHIVECONTEXT  REM Use HiveContext instead of SQLContext if set true. true by default.
 REM set ZEPPELIN_SPARK_CONCURRENTSQL   REM Execute multiple SQL concurrently if set true. false by default.
+REM set ZEPPELIN_SPARK_IMPORTIMPLICIT  REM Import implicits, UDF collection, and sql if set true. true by default.
 REM set ZEPPELIN_SPARK_MAXRESULT       REM Max number of SparkSQL result to display. 1000 by default.
 
 REM ZeppelinHub connection configuration

http://git-wip-us.apache.org/repos/asf/zeppelin/blob/024bad9b/conf/zeppelin-env.sh.template
----------------------------------------------------------------------
diff --git a/conf/zeppelin-env.sh.template b/conf/zeppelin-env.sh.template
index be6f3dd..52e36f7 100644
--- a/conf/zeppelin-env.sh.template
+++ b/conf/zeppelin-env.sh.template
@@ -55,15 +55,17 @@
 # Pyspark (supported with Spark 1.2.1 and above)
 # To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
 # export PYSPARK_PYTHON          		# path to the python command. must be the same path on the driver(Zeppelin) and all workers.
-# export PYTHONPATH  
+# export PYTHONPATH
 
 ## Spark interpreter options ##
 ##
 # export ZEPPELIN_SPARK_USEHIVECONTEXT  # Use HiveContext instead of SQLContext if set true. true by default.
 # export ZEPPELIN_SPARK_CONCURRENTSQL   # Execute multiple SQL concurrently if set true. false by default.
+# export ZEPPELIN_SPARK_IMPORTIMPLICIT  # Import implicits, UDF collection, and sql if set true. true by default.
 # export ZEPPELIN_SPARK_MAXRESULT       # Max number of SparkSQL result to display. 1000 by default.
 # export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE       # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000
 
+
 #### HBase interpreter configuration ####
 
 ## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set
@@ -75,4 +77,3 @@
 # export ZEPPELINHUB_API_ADDRESS		# Refers to the address of the ZeppelinHub service in use
 # export ZEPPELINHUB_API_TOKEN			# Refers to the Zeppelin instance token of the user
 # export ZEPPELINHUB_USER_KEY			# Optional, when using Zeppelin with authentication.
-

http://git-wip-us.apache.org/repos/asf/zeppelin/blob/024bad9b/docs/interpreter/spark.md
----------------------------------------------------------------------
diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md
index 30da219..df5e831 100644
--- a/docs/interpreter/spark.md
+++ b/docs/interpreter/spark.md
@@ -45,7 +45,7 @@ Spark Interpreter group, which consists of five interpreters.
 </table>
 
 ## Configuration
-The Spark interpreter can be configured with properties provided by Zeppelin. 
+The Spark interpreter can be configured with properties provided by Zeppelin.
 You can also set other Spark properties which are not listed in the table. For a list of additional properties, refer to [Spark Available Properties](http://spark.apache.org/docs/latest/configuration.html#available-properties).
 <table class="table-configuration">
   <tr>
@@ -111,6 +111,11 @@ You can also set other Spark properties which are not listed in the table. For a
     <td>true</td>
     <td>Use HiveContext instead of SQLContext if it is true.</td>
   </tr>
+  <tr>
+    <td>zeppelin.spark.importImplicit</td>
+    <td>true</td>
+    <td>Import implicits, UDF collection, and sql if set true.</td>
+  </tr>
 </table>
 
 Without any configuration, Spark interpreter works out of box in local mode. But if you want to connect to your Spark cluster, you'll need to follow below two simple steps.

http://git-wip-us.apache.org/repos/asf/zeppelin/blob/024bad9b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java
----------------------------------------------------------------------
diff --git a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java
index 0bbe418..6783378 100644
--- a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java
+++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java
@@ -176,6 +176,10 @@ public class SparkInterpreter extends Interpreter {
     return java.lang.Boolean.parseBoolean(getProperty("zeppelin.spark.useHiveContext"));
   }
 
+  private boolean importImplicit() {
+    return java.lang.Boolean.parseBoolean(getProperty("zeppelin.spark.importImplicit"));
+  }
+
   public SQLContext getSQLContext() {
     synchronized (sharedInterpreterLock) {
       if (sqlc == null) {
@@ -250,7 +254,7 @@ public class SparkInterpreter extends Interpreter {
           | IllegalArgumentException | InvocationTargetException e) {
         // continue instead of: throw new InterpreterException(e);
         // Newer Spark versions (like the patched CDH5.7.0 one) don't contain this method
-        logger.warn(String.format("Spark method classServerUri not available due to: [%s]", 
+        logger.warn(String.format("Spark method classServerUri not available due to: [%s]",
           e.getMessage()));
       }
     }
@@ -540,12 +544,14 @@ public class SparkInterpreter extends Interpreter {
               + "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
       intp.interpret("import org.apache.spark.SparkContext._");
 
-      if (sparkVersion.oldSqlContextImplicits()) {
-        intp.interpret("import sqlContext._");
-      } else {
-        intp.interpret("import sqlContext.implicits._");
-        intp.interpret("import sqlContext.sql");
-        intp.interpret("import org.apache.spark.sql.functions._");
+      if (importImplicit()) {
+        if (sparkVersion.oldSqlContextImplicits()) {
+          intp.interpret("import sqlContext._");
+        } else {
+          intp.interpret("import sqlContext.implicits._");
+          intp.interpret("import sqlContext.sql");
+          intp.interpret("import org.apache.spark.sql.functions._");
+        }
       }
     }
 

http://git-wip-us.apache.org/repos/asf/zeppelin/blob/024bad9b/spark/src/main/resources/interpreter-setting.json
----------------------------------------------------------------------
diff --git a/spark/src/main/resources/interpreter-setting.json b/spark/src/main/resources/interpreter-setting.json
index 1d36a29..d46801b 100644
--- a/spark/src/main/resources/interpreter-setting.json
+++ b/spark/src/main/resources/interpreter-setting.json
@@ -77,6 +77,12 @@
         "propertyName": "zeppelin.spark.maxResult",
         "defaultValue": "1000",
         "description": "Max number of SparkSQL result to display."
+      },
+      "zeppelin.spark.importImplicit": {
+        "envName": "ZEPPELIN_SPARK_IMPORTIMPLICIT",
+        "propertyName": "zeppelin.spark.importImplicit",
+        "defaultValue": "true",
+        "description": "Import implicits, UDF collection, and sql if set true. true by default."
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/zeppelin/blob/024bad9b/spark/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java
----------------------------------------------------------------------
diff --git a/spark/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java b/spark/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java
index 409f938..eb8d876 100644
--- a/spark/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java
+++ b/spark/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java
@@ -69,6 +69,7 @@ public class SparkInterpreterTest {
     p.setProperty("spark.app.name", "Zeppelin Test");
     p.setProperty("zeppelin.spark.useHiveContext", "true");
     p.setProperty("zeppelin.spark.maxResult", "1000");
+    p.setProperty("zeppelin.spark.importImplicit", "true");
 
     return p;
   }
@@ -228,4 +229,35 @@ public class SparkInterpreterTest {
 
     repl2.close();
   }
+
+  @Test
+  public void testEnableImplicitImport() {
+    // Set option of importing implicits to "true", and initialize new Spark repl
+    Properties p = getSparkTestProperties();
+    p.setProperty("zeppelin.spark.importImplicit", "true");
+    SparkInterpreter repl2 = new SparkInterpreter(p);
+    repl2.setInterpreterGroup(intpGroup);
+    intpGroup.get("note").add(repl2);
+
+    repl2.open();
+    String ddl = "val df = Seq((1, true), (2, false)).toDF(\"num\", \"bool\")";
+    assertEquals(Code.SUCCESS, repl2.interpret(ddl, context).code());
+    repl2.close();
+  }
+
+  @Test
+  public void testDisableImplicitImport() {
+    // Set option of importing implicits to "false", and initialize new Spark repl
+    // this test should return error status when creating DataFrame from sequence
+    Properties p = getSparkTestProperties();
+    p.setProperty("zeppelin.spark.importImplicit", "false");
+    SparkInterpreter repl2 = new SparkInterpreter(p);
+    repl2.setInterpreterGroup(intpGroup);
+    intpGroup.get("note").add(repl2);
+
+    repl2.open();
+    String ddl = "val df = Seq((1, true), (2, false)).toDF(\"num\", \"bool\")";
+    assertEquals(Code.ERROR, repl2.interpret(ddl, context).code());
+    repl2.close();
+  }
 }