carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chenliang...@apache.org
Subject [1/2] incubator-carbondata git commit: Added Carbon spark shell
Date Thu, 14 Jul 2016 14:36:00 GMT
Repository: incubator-carbondata
Updated Branches:
  refs/heads/master 3f56dfb2c -> 66b320b3c


Added Carbon spark shell

Added Carbon spark shell

Updated store and metadata location


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/6270d264
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/6270d264
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/6270d264

Branch: refs/heads/master
Commit: 6270d26441c8c49c87db487cbe6547be0330be5e
Parents: 3f56dfb
Author: ravipesala <ravi.pesala@gmail.com>
Authored: Mon Jul 11 11:16:30 2016 +0530
Committer: chenliang613 <chenliang613@apache.org>
Committed: Thu Jul 14 20:03:57 2016 +0530

----------------------------------------------------------------------
 bin/carbon-spark-shell                          | 115 +++++++++++++++++++
 integration/spark/pom.xml                       |   6 +
 .../apache/spark/repl/CarbonSparkILoop.scala    |  60 ++++++++++
 .../org/apache/spark/repl/carbon/Main.scala     |  33 ++++++
 .../spark/sql/hive/cli/CarbonSQLCLIDriver.scala |   2 +-
 5 files changed, 215 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6270d264/bin/carbon-spark-shell
----------------------------------------------------------------------
diff --git a/bin/carbon-spark-shell b/bin/carbon-spark-shell
new file mode 100755
index 0000000..1b7148f
--- /dev/null
+++ b/bin/carbon-spark-shell
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Shell script for starting the Spark Shell REPL
+
+cygwin=false
+case "`uname`" in
+  CYGWIN*) cygwin=true;;
+esac
+
+# Enter posix mode for bash
+set -o posix
+
+export FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]"
+
+export FWDIR=$SPARK_HOME
+export CARBON_SOURCE="$(cd "`dirname "$0"`"/..; pwd)"
+ASSEMBLY_DIR="$CARBON_SOURCE/assembly/target/scala-2.10"
+GREP_OPTIONS=
+num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^carbondata.*hadoop.*\.jar$" | wc -l)"
+if [ "$num_jars" -eq "0" -a -z "$ASSEMBLY_DIR" ]; then
+  echo "Failed to find Carbondata assembly in $ASSEMBLY_DIR." 1>&2
+  echo "You need to build Carbondata before running this program." 1>&2
+  exit 1
+fi
+ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^carbondata.*hadoop.*\.jar$" || true)"
+if [ "$num_jars" -gt "1" ]; then
+  echo "Found multiple Carbondata assembly jars in $ASSEMBLY_DIR:" 1>&2
+  echo "$ASSEMBLY_JARS" 1>&2
+  echo "Please remove all but one jar." 1>&2
+  exit 1
+fi
+
+ASSEMBLY_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
+export JAR="$ASSEMBLY_JAR"
+export SPARK_CLASSPATH=$SPARK_CLASSPATH:$JAR
+
+
+# SPARK-4161: scala does not assume use of the java classpath,
+# so we need to add the "-Dscala.usejavacp=true" flag manually. We
+# do this specifically for the Spark shell because the scala REPL
+# has its own class loader, and any additional classpath specified
+# through spark.driver.extraClassPath is not automatically propagated.
+SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dscala.usejavacp=true"
+
+function main() {
+  if $cygwin; then
+    # Workaround for issue involving JLine and Cygwin
+    # (see http://sourceforge.net/p/jline/bugs/40/).
+    # If you're using the Mintty terminal emulator in Cygwin, may need to set the
+    # "Backspace sends ^H" setting in "Keys" section of the Mintty options
+    # (see https://github.com/sbt/sbt/issues/562).
+    stty -icanon min 1 -echo > /dev/null 2>&1
+    export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
+    "$FWDIR"/bin/spark-submit "$@" --name "Carbon Spark shell" --class org.apache.spark.repl.carbon.Main
$JAR 
+    stty icanon echo > /dev/null 2>&1
+  else
+    export SPARK_SUBMIT_OPTS
+    "$FWDIR"/bin/spark-submit "$@" --name "Carbon Spark shell" --class org.apache.spark.repl.carbon.Main
$JAR
+  fi
+}
+
+# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even
in
+# binary distribution of Spark where Scala is not installed
+exit_status=127
+saved_stty=""
+
+# restore stty settings (echo in particular)
+function restoreSttySettings() {
+  stty $saved_stty
+  saved_stty=""
+}
+
+function onExit() {
+  if [[ "$saved_stty" != "" ]]; then
+    restoreSttySettings
+  fi
+  exit $exit_status
+}
+
+# to reenable echo if we are interrupted before completing.
+trap onExit INT
+
+# save terminal settings
+saved_stty=$(stty -g 2>/dev/null)
+# clear on error so we don't later try to restore them
+if [[ ! $? ]]; then
+  saved_stty=""
+fi
+
+main "$@"
+
+# record the exit status lest it be overwritten:
+# then reenable echo and propagate the code.
+exit_status=$?
+onExit
+

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6270d264/integration/spark/pom.xml
----------------------------------------------------------------------
diff --git a/integration/spark/pom.xml b/integration/spark/pom.xml
index f9f023e..e88ef6e 100644
--- a/integration/spark/pom.xml
+++ b/integration/spark/pom.xml
@@ -116,6 +116,12 @@
       <scope>${spark.deps.scope}</scope>
     </dependency>
     <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-repl_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <scope>${spark.deps.scope}</scope>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <version>4.11</version>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6270d264/integration/spark/src/main/scala/org/apache/spark/repl/CarbonSparkILoop.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/repl/CarbonSparkILoop.scala
b/integration/spark/src/main/scala/org/apache/spark/repl/CarbonSparkILoop.scala
new file mode 100644
index 0000000..aaed3d8
--- /dev/null
+++ b/integration/spark/src/main/scala/org/apache/spark/repl/CarbonSparkILoop.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+class CarbonSparkILoop extends SparkILoop {
+
+  override def initializeSpark() {
+    intp.beQuietDuring {
+      command("""
+         if(org.apache.spark.repl.carbon.Main.interp == null) {
+           org.apache.spark.repl.carbon.Main.main(Array[String]())
+         }
+              """)
+      command("val i1 = org.apache.spark.repl.carbon.Main.interp")
+      command("import i1._")
+      command("""
+         @transient val sc = {
+           val _sc = i1.createSparkContext()
+           println("Spark context available as sc.")
+           _sc
+         }
+              """)
+      command("import org.apache.spark.SparkContext._")
+      command("import org.apache.spark.sql.CarbonContext")
+      command("""
+         @transient val cc = {
+           val _cc = {
+             import java.io.File
+             val store = new File("./carbonshellstore")
+             store.mkdirs()
+             val storePath = sc.getConf.getOption("spark.carbon.storepath")
+                  .getOrElse(store.getCanonicalPath)
+             new CarbonContext(sc, storePath, store.getCanonicalPath)
+           }
+           println("Carbon context available as cc.")
+           _cc
+         }
+              """)
+      command("""cc.setConf("carbon.kettle.home", "../processing/carbonplugins")""")
+      command("import cc.implicits._")
+      command("import cc.sql")
+      command("import org.apache.spark.sql.functions._")
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6270d264/integration/spark/src/main/scala/org/apache/spark/repl/carbon/Main.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/repl/carbon/Main.scala b/integration/spark/src/main/scala/org/apache/spark/repl/carbon/Main.scala
new file mode 100644
index 0000000..d4ab362
--- /dev/null
+++ b/integration/spark/src/main/scala/org/apache/spark/repl/carbon/Main.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl.carbon
+
+import org.apache.spark.repl.{CarbonSparkILoop, SparkILoop}
+
+object Main {
+  private var _interp: SparkILoop = _
+
+  def interp: SparkILoop = _interp
+
+  def interp_=(i: SparkILoop) { _interp = i }
+
+  def main(args: Array[String]) {
+    _interp = new CarbonSparkILoop
+    _interp.process(args)
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6270d264/integration/spark/src/main/scala/org/apache/spark/sql/hive/cli/CarbonSQLCLIDriver.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/cli/CarbonSQLCLIDriver.scala
b/integration/spark/src/main/scala/org/apache/spark/sql/hive/cli/CarbonSQLCLIDriver.scala
index 6406975..0dafe2d 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/cli/CarbonSQLCLIDriver.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/cli/CarbonSQLCLIDriver.scala
@@ -63,7 +63,7 @@ object CarbonSQLCLIDriver extends Logging {
 
       sparkContext = new SparkContext(sparkConf)
       sparkContext.addSparkListener(new StatsReportListener())
-      val store = new File("../carbonsqlclistore")
+      val store = new File("./carbonsqlclistore")
       store.mkdirs()
       hiveContext = new CarbonContext(sparkContext,
         maybeStorePath.getOrElse(store.getCanonicalPath),


Mime
View raw message