Updated Branches: refs/heads/master 46b28b00d -> 95bac804e BIGTOP-1030. Develop integration tests for new Spark component Signed-off-by: Konstantin Boudnik Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/95bac804 Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/95bac804 Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/95bac804 Branch: refs/heads/master Commit: 95bac804ea5f98c5d448e9a404604d825fb50af8 Parents: 46b28b0 Author: Henry Wang Authored: Tue Sep 10 16:41:43 2013 -0700 Committer: Konstantin Boudnik Committed: Tue Sep 10 16:41:43 2013 -0700 ---------------------------------------------------------------------- bigtop-tests/test-artifacts/pom.xml | 1 + bigtop-tests/test-artifacts/spark/pom.xml | 60 +++++++ .../bigtop/itest/spark/TestSparkSmoke.groovy | 98 +++++++++++ .../spark/src/main/resources/README.md | 73 ++++++++ .../spark/src/main/resources/kmeans_data.txt | 6 + .../test-execution/smokes/spark/pom.xml | 166 +++++++++++++++++++ pom.xml | 1 + 7 files changed, 405 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/95bac804/bigtop-tests/test-artifacts/pom.xml ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/pom.xml b/bigtop-tests/test-artifacts/pom.xml index 9f46627..bcbdf59 100644 --- a/bigtop-tests/test-artifacts/pom.xml +++ b/bigtop-tests/test-artifacts/pom.xml @@ -48,6 +48,7 @@ datafu fatjar hcatalog + spark http://git-wip-us.apache.org/repos/asf/bigtop/blob/95bac804/bigtop-tests/test-artifacts/spark/pom.xml ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/spark/pom.xml b/bigtop-tests/test-artifacts/spark/pom.xml new file mode 100644 index 0000000..a29f87f --- /dev/null +++ b/bigtop-tests/test-artifacts/spark/pom.xml @@ -0,0 +1,60 @@ + + + 4.0.0 + + + org.apache.bigtop.itest + bigtop-smokes + 0.7.0-SNAPSHOT + ../pom.xml + + + org.apache.bigtop.itest + spark-smoke + 0.7.0-SNAPSHOT + sparksmoke + + + + akka-repo + Akka Repository + http://repo.akka.io/releases/ + + true + + + false + + + + spray-repo + Spray Repository + http://repo.spray.cc/ + + true + + + false + + + + + + + org.spark-project + spark-core + 0.8.0-SNAPSHOT + hadoop2-yarn + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.hadoop + hadoop-common + + + http://git-wip-us.apache.org/repos/asf/bigtop/blob/95bac804/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy new file mode 100644 index 0000000..414ec9c --- /dev/null +++ b/bigtop-tests/test-artifacts/spark/src/main/groovy/org/apache/bigtop/itest/spark/TestSparkSmoke.groovy @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bigtop.itest.spark + +import org.apache.bigtop.itest.shell.Shell +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path + +import org.junit.Test +import org.junit.BeforeClass +import static org.junit.Assert.assertEquals + +import static org.apache.bigtop.itest.LogErrorsUtils.logError + +import spark.api.java.* +import spark.api.java.function.Function + +public class TestSparkSmoke implements Serializable { + + private static String SPARK_HOME = System.getenv("SPARK_HOME") + private static String SPARK_MASTER = System.getenv("SPARK_MASTER") + private static String USER = System.getProperty("user.name") + private static String pwd = "" + private static Configuration conf + static Shell sh = new Shell("/bin/bash -s") + def result = ["0.2: 3", "0.1: 3", "0.0: 3", "9.0: 3", "9.2: 3", "9.1: 3"] + + @BeforeClass + static void setUp() { + sh.exec("pwd") + pwd = sh.out + int lastIndex = pwd.length() - 1 + pwd = pwd.substring(1, lastIndex) + } + + @Test + void ShellTest() { + String kmeans = "file://" + pwd + "/kmeans_data.txt" + sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount local " + kmeans) + logError(sh) + assertEquals(result, sh.out) + } + + @Test + public void HDFSTest() { + conf = new Configuration() + String fs_default_name = conf.get("fs.defaultFS") + FileSystem fs = FileSystem.get(conf) + String pathname = "/user/${USER}/kmeans_data.txt" + fs.copyFromLocalFile(new Path("kmeans_data.txt"), new Path(pathname)) + fs.close() + + String dfsname = fs_default_name + pathname + sh.exec("cd ${SPARK_HOME} && ./spark-class org.apache.spark.examples.JavaWordCount ${SPARK_MASTER} " + dfsname) + logError(sh) + assertEquals(result, sh.out) + } + + @Test + public void JobTest() { + String logFile = "file://" + pwd + "/README.md"; + String[] jars = [System.getProperty("sparkJar"), org.apache.bigtop.itest.JarContent.getJarURL("groovy.lang.GroovyObject")]; + + JavaSparkContext sc = new JavaSparkContext("local", "Simple Job", + SPARK_HOME, jars); + + JavaRDD logData = sc.textFile(logFile).cache(); + + long num_Spark = logData.filter(new Function() { + public Boolean call(String s) { return s.contains("Spark"); } + }).count(); + + long num_e = logData.filter(new Function() { + public Boolean call(String s) { return s.contains("e"); } + }).count(); + + assertEquals("Lines containing 'spark' should be 14", num_Spark, 14); + assertEquals("Lines containing 'e' should be 43", num_e, 43); + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/95bac804/bigtop-tests/test-artifacts/spark/src/main/resources/README.md ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/README.md b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md new file mode 100644 index 0000000..ba24ab4 --- /dev/null +++ b/bigtop-tests/test-artifacts/spark/src/main/resources/README.md @@ -0,0 +1,73 @@ +# Spark + +Lightning-Fast Cluster Computing - + + +## Online Documentation + +You can find the latest Spark documentation, including a programming +guide, on the project webpage at . +This README file only contains basic setup instructions. + + +## Building + +Spark requires Scala 2.9.2 (Scala 2.10 is not yet supported). The project is +built using Simple Build Tool (SBT), which is packaged with it. To build +Spark and its example programs, run: + + sbt/sbt package + +Spark also supports building using Maven. If you would like to build using Maven, +see the [instructions for building Spark with Maven](http://spark-project.org/docs/latest/building-with-maven.html) +in the spark documentation.. + +To run Spark, you will need to have Scala's bin directory in your `PATH`, or +you will need to set the `SCALA_HOME` environment variable to point to where +you've installed Scala. Scala must be accessible through one of these +methods on your cluster's worker nodes as well as its master. + +To run one of the examples, use `./run `. For example: + + ./run spark.examples.SparkLR local[2] + +will run the Logistic Regression example locally on 2 CPUs. + +Each of the example programs prints usage help if no params are given. + +All of the Spark samples take a `` parameter that is the cluster URL +to connect to. This can be a mesos:// or spark:// URL, or "local" to run +locally with one thread, or "local[N]" to run locally with N threads. + + +## A Note About Hadoop Versions + +Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported +storage systems. Because the HDFS API has changed in different versions of +Hadoop, you must build Spark against the same version that your cluster runs. +You can change the version by setting the `HADOOP_VERSION` variable at the top +of `project/SparkBuild.scala`, then rebuilding Spark. + + +## Configuration + +Please refer to the "Configuration" guide in the online documentation for a +full overview on how to configure Spark. At the minimum, you will need to +create a `conf/spark-env.sh` script (copy `conf/spark-env.sh.template`) and +set the following two variables: + +- `SCALA_HOME`: Location where Scala is installed. + +- `MESOS_NATIVE_LIBRARY`: Your Mesos library (only needed if you want to run + on Mesos). For example, this might be `/usr/local/lib/libmesos.so` on Linux. + + +## Contributing to Spark + +Contributions via GitHub pull requests are gladly accepted from their original +author. Along with any pull requests, please state that the contribution is +your original work and that you license the work to the project under the +project's open source license. Whether or not you state this explicitly, by +submitting any copyrighted material via pull request, email, or other means +you agree to license the material under the project's open source license and +warrant that you have the legal authority to do so. http://git-wip-us.apache.org/repos/asf/bigtop/blob/95bac804/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt new file mode 100644 index 0000000..338664f --- /dev/null +++ b/bigtop-tests/test-artifacts/spark/src/main/resources/kmeans_data.txt @@ -0,0 +1,6 @@ +0.0 0.0 0.0 +0.1 0.1 0.1 +0.2 0.2 0.2 +9.0 9.0 9.0 +9.1 9.1 9.1 +9.2 9.2 9.2 http://git-wip-us.apache.org/repos/asf/bigtop/blob/95bac804/bigtop-tests/test-execution/smokes/spark/pom.xml ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-execution/smokes/spark/pom.xml b/bigtop-tests/test-execution/smokes/spark/pom.xml new file mode 100644 index 0000000..e7a80d9 --- /dev/null +++ b/bigtop-tests/test-execution/smokes/spark/pom.xml @@ -0,0 +1,166 @@ + + + + 4.0.0 + + + org.apache.bigtop.itest + smoke-tests + 0.7.0-SNAPSHOT + ../pom.xml + + + org.apache.bigtop.itest + spark-smoke-execution + 0.7.0-SNAPSHOT + Spark smoke test execution + + + ${env.SPARK_HOME} + ${env.SPARK_MASTER} + org.apache.bigtop.itest + spark-smoke + ${spark-smoke.version} + ${project.build.directory} + + **/* + jar + + + + + akka-repo + Akka Repository + http://repo.akka.io/releases/ + + true + + + false + + + + spray-repo + Spray Repository + http://repo.spray.cc/ + + true + + + false + + + + + + + commons-logging + commons-logging + 1.1 + test + + + org.apache.cxf + cxf-rt-frontend-jaxrs + 2.5.0 + + + org.apache.hadoop + hadoop-common + + + asm + asm + + + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + ${hadoop.version} + + + + ${org.apache.maven-dependency-plugin.groupId} + ${org.apache.maven-dependency-plugin.artifactId} + ${spark-smoke.version} + + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + + org.apache.maven.plugins + maven-failsafe-plugin + 2.11 + + always + + + ${project.build.directory}/${org.apache.maven-dependency-plugin.artifactId}-${org.apache.maven-dependency-plugin.version}.${org.apache.maven-dependency-plugin.type} + + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + 1.0 + + + enforce-property + + enforce + + + + + SPARK_HOME + SPARK_HOME env. variable has to be set + + + SPARK_MASTER + SPARK_MASTER env. variable has to be set + + + HADOOP_CONF_DIR + HADOOP_CONF_DIR env. variable has to be set + + + true + + + + + + + + http://git-wip-us.apache.org/repos/asf/bigtop/blob/95bac804/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index f89b3fa..0ac1d49 100644 --- a/pom.xml +++ b/pom.xml @@ -44,6 +44,7 @@ 3.4.5 0.2-SNAPSHOT 4.2.1 + ${project.version} ${project.version}