spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From andrewor14 <...@git.apache.org>
Subject [GitHub] spark pull request: [SPARK-4924] Add a library for launching Spark...
Date Tue, 13 Jan 2015 02:52:49 GMT
Github user andrewor14 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/3916#discussion_r22840665
  
    --- Diff: launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java ---
    @@ -0,0 +1,371 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.launcher;
    +
    +import java.io.File;
    +import java.io.IOException;
    +import java.lang.reflect.Method;
    +import java.net.URL;
    +import java.net.URLClassLoader;
    +import java.util.ArrayList;
    +import java.util.Collections;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Properties;
    +import java.util.concurrent.atomic.AtomicLong;
    +
    +/**
    + * Launcher for Spark applications.
    + * <p/>
    + * Use this class to start Spark applications programatically. The class uses a builder
pattern
    + * to allow clients to configure the Spark application and launch it as a child process.
    + * <p/>
    + * There's also support for running the application on a separate thread, although that
is to
    + * be considered experimental and avoided in production environments.
    + * <p/>
    + * Note that launching Spark applications using this class will not automatically load
environment
    + * variables from the "spark-env.sh" or "spark-env.cmd" scripts in the configuration
directory.
    + */
    +public class SparkLauncher extends AbstractLauncher<SparkLauncher> {
    +
    +  private static final AtomicLong THREAD_ID = new AtomicLong();
    +
    +  protected boolean verbose;
    +  protected String appName;
    +  protected String master;
    +  protected String deployMode;
    +  protected String userClass;
    +  protected String userResource;
    +  protected final List<String> sparkArgs;
    +  protected final List<String> userArgs;
    +  protected final List<String> jars;
    +  protected final List<String> files;
    +  protected final List<String> pyFiles;
    +
    +  public SparkLauncher() {
    +    this.sparkArgs = new ArrayList<String>();
    +    this.userArgs = new ArrayList<String>();
    +    this.jars = new ArrayList<String>();
    +    this.files = new ArrayList<String>();
    +    this.pyFiles = new ArrayList<String>();
    +  }
    +
    +  /** Set the application name. */
    +  public SparkLauncher setAppName(String appName) {
    +    checkNotNull(appName, "appName");
    +    this.appName = appName;
    +    return this;
    +  }
    +
    +  /** Set the Spark master for the application. */
    +  public SparkLauncher setMaster(String master) {
    +    checkNotNull(master, "master");
    +    this.master = master;
    +    return this;
    +  }
    +
    +  /** Set the deploy mode for the application. */
    +  public SparkLauncher setDeployMode(String mode) {
    +    checkNotNull(mode, "mode");
    +    this.deployMode = mode;
    +    return this;
    +  }
    +
    +  /**
    +   * Set the main application resource. This should be the location of a jar file for
Scala/Java
    +   * applications, or a python script for PySpark applications.
    +   */
    +  public SparkLauncher setAppResource(String path) {
    +    checkNotNull(path, "path");
    +    this.userResource = path;
    +    return this;
    +  }
    +
    +  /** Sets the application class name for Java/Scala applications. */
    +  public SparkLauncher setClass(String userClass) {
    +    checkNotNull(userClass, "userClass");
    +    this.userClass = userClass;
    +    return this;
    +  }
    +
    +  /** Adds command line arguments for the application. */
    +  public SparkLauncher addArgs(String... args) {
    +    for (String arg : args) {
    +      checkNotNull(arg, "arg");
    +      userArgs.add(arg);
    +    }
    +    return this;
    +  }
    +
    +  /** Adds a jar file to be submitted with the application. */
    +  public SparkLauncher addJar(String path) {
    +    checkNotNull(path, "path");
    +    jars.add(path);
    +    return this;
    +  }
    +
    +  /** Adds a file to be submitted with the application. */
    +  public SparkLauncher addFile(String path) {
    +    checkNotNull(path, "path");
    +    files.add(path);
    +    return this;
    +  }
    +
    +  /** Adds a a python file / zip / egg to be submitted with the application. */
    +  public SparkLauncher addPyFile(String path) {
    +    checkNotNull(path, "path");
    +    pyFiles.add(path);
    +    return this;
    +  }
    +
    +  /** Enables verbose reporting for SparkSubmit. */
    +  public SparkLauncher setVerbose(boolean verbose) {
    +    this.verbose = verbose;
    +    return this;
    +  }
    +
    +  /**
    +   * Starts a new thread that will run the Spark application.
    +   * <p/>
    +   * The application will run on a separate thread and use a separate, isolated class
loader.
    +   * No classes or resources from the current thread's class loader will be visible to
the app.
    +   * <p/>
    +   * This mode does not support certain configuration parameters, like configuring the
amount of
    +   * driver memory or custom driver command line options. If such configuration is detected,
an
    +   * exception will be thrown.
    +   * <p/>
    +   * This is extremely experimental and should not be used in production environments.
    +   * <p/>
    +   * NOTE: SparkSubmit uses system properties to propagate some configuration value to
the app
    +   * are run concurrently, they may affect each other's configurations.
    +   * <p/>
    +   * NOTE: for users running JDK versions older than 8, this option can add a lot of
overhead
    +   * to the VM's perm gen.
    +   *
    +   * @param exceptionHandler Optional handler for handling exceptions in the app thread.
    +   * @param daemon Whether to start a daemon thread.
    +   * @return A non-daemon thread that will run the application using SparkSubmit. The
thread will
    +   *         already be started.
    +   */
    +  public Thread start(Thread.UncaughtExceptionHandler handler, boolean daemon) throws
IOException {
    --- End diff --
    
    Hm, can you motivate the use case of this? Is there a scenario that a user would prefer
to launch it in a thread instead? Also, if we do decide to support this we should probably
name this something more specific, since it's confusing to have a `start` and a `launch` and
have them do different things.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message