Return-Path: X-Original-To: apmail-spark-commits-archive@minotaur.apache.org Delivered-To: apmail-spark-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 6508418192 for ; Sat, 2 May 2015 01:37:59 +0000 (UTC) Received: (qmail 41096 invoked by uid 500); 2 May 2015 01:37:59 -0000 Delivered-To: apmail-spark-commits-archive@spark.apache.org Received: (qmail 41060 invoked by uid 500); 2 May 2015 01:37:59 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 41051 invoked by uid 99); 2 May 2015 01:37:59 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 02 May 2015 01:37:59 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 2A5EDE17E2; Sat, 2 May 2015 01:37:59 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: andrewor14@apache.org To: commits@spark.apache.org Message-Id: <438f9d24182b4fd9aa93ed4a9fbc9c21@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-7216] [MESOS] Add driver details page to Mesos cluster UI. Date: Sat, 2 May 2015 01:37:59 +0000 (UTC) Repository: spark Updated Branches: refs/heads/master 099327d53 -> 202219341 [SPARK-7216] [MESOS] Add driver details page to Mesos cluster UI. Add a details page that displays Mesos driver in the Mesos cluster UI Author: Timothy Chen Closes #5763 from tnachen/mesos_cluster_page and squashes the following commits: 55f36eb [Timothy Chen] Add driver details page to Mesos cluster UI. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20221934 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20221934 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20221934 Branch: refs/heads/master Commit: 2022193412e832393a29b94609841c3ffe8e3d66 Parents: 099327d Author: Timothy Chen Authored: Fri May 1 18:36:42 2015 -0700 Committer: Andrew Or Committed: Fri May 1 18:36:42 2015 -0700 ---------------------------------------------------------------------- .../spark/deploy/mesos/ui/DriverPage.scala | 180 +++++++++++++++++++ .../deploy/mesos/ui/MesosClusterPage.scala | 9 +- .../spark/deploy/mesos/ui/MesosClusterUI.scala | 1 + .../deploy/rest/mesos/MesosRestServer.scala | 6 +- .../cluster/mesos/MesosClusterScheduler.scala | 33 +++- .../cluster/mesos/MesosSchedulerBackend.scala | 4 +- 6 files changed, 222 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/20221934/core/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala b/core/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala new file mode 100644 index 0000000..be8560d --- /dev/null +++ b/core/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.mesos.ui + +import javax.servlet.http.HttpServletRequest + +import scala.xml.Node + +import org.apache.spark.deploy.Command +import org.apache.spark.deploy.mesos.MesosDriverDescription +import org.apache.spark.scheduler.cluster.mesos.{MesosClusterSubmissionState, MesosClusterRetryState} +import org.apache.spark.ui.{UIUtils, WebUIPage} + + +private[ui] class DriverPage(parent: MesosClusterUI) extends WebUIPage("driver") { + + override def render(request: HttpServletRequest): Seq[Node] = { + val driverId = request.getParameter("id") + require(driverId != null && driverId.nonEmpty, "Missing id parameter") + + val state = parent.scheduler.getDriverState(driverId) + if (state.isEmpty) { + val content = +
+

Cannot find driver {driverId}

+
+ return UIUtils.basicSparkPage(content, s"Details for Job $driverId") + } + val driverState = state.get + val driverHeaders = Seq("Driver property", "Value") + val schedulerHeaders = Seq("Scheduler property", "Value") + val commandEnvHeaders = Seq("Command environment variable", "Value") + val launchedHeaders = Seq("Launched property", "Value") + val commandHeaders = Seq("Comamnd property", "Value") + val retryHeaders = Seq("Last failed status", "Next retry time", "Retry count") + val driverDescription = Iterable.apply(driverState.description) + val submissionState = Iterable.apply(driverState.submissionState) + val command = Iterable.apply(driverState.description.command) + val schedulerProperties = Iterable.apply(driverState.description.schedulerProperties) + val commandEnv = Iterable.apply(driverState.description.command.environment) + val driverTable = + UIUtils.listingTable(driverHeaders, driverRow, driverDescription) + val commandTable = + UIUtils.listingTable(commandHeaders, commandRow, command) + val commandEnvTable = + UIUtils.listingTable(commandEnvHeaders, propertiesRow, commandEnv) + val schedulerTable = + UIUtils.listingTable(schedulerHeaders, propertiesRow, schedulerProperties) + val launchedTable = + UIUtils.listingTable(launchedHeaders, launchedRow, submissionState) + val retryTable = + UIUtils.listingTable( + retryHeaders, retryRow, Iterable.apply(driverState.description.retryState)) + val content = +

Driver state information for driver id {driverId}

+ Back to Drivers +
+
+

Driver state: {driverState.state}

+

Driver properties

+ {driverTable} +

Driver command

+ {commandTable} +

Driver command environment

+ {commandEnvTable} +

Scheduler properties

+ {schedulerTable} +

Launched state

+ {launchedTable} +

Retry state

+ {retryTable} +
+
; + + UIUtils.basicSparkPage(content, s"Details for Job $driverId") + } + + private def launchedRow(submissionState: Option[MesosClusterSubmissionState]): Seq[Node] = { + submissionState.map { state => + + Mesos Slave ID + {state.slaveId.getValue} + + + Mesos Task ID + {state.taskId.getValue} + + + Launch Time + {state.startDate} + + + Finish Time + {state.finishDate.map(_.toString).getOrElse("")} + + + Last Task Status + {state.mesosTaskStatus.map(_.toString).getOrElse("")} + + }.getOrElse(Seq[Node]()) + } + + private def propertiesRow(properties: collection.Map[String, String]): Seq[Node] = { + properties.map { case (k, v) => + + {k}{v} + + }.toSeq + } + + private def commandRow(command: Command): Seq[Node] = { + + Main class{command.mainClass} + + + Arguments{command.arguments.mkString(" ")} + + + Class path entries{command.classPathEntries.mkString(" ")} + + + Java options{command.javaOpts.mkString((" "))} + + + Library path entries{command.libraryPathEntries.mkString((" "))} + + } + + private def driverRow(driver: MesosDriverDescription): Seq[Node] = { + + Name{driver.name} + + + Id{driver.submissionId} + + + Cores{driver.cores} + + + Memory{driver.mem} + + + Submitted{driver.submissionDate} + + + Supervise{driver.supervise} + + } + + private def retryRow(retryState: Option[MesosClusterRetryState]): Seq[Node] = { + retryState.map { state => + + + {state.lastFailureStatus} + + + {state.nextRetry} + + + {state.retries} + + + }.getOrElse(Seq[Node]()) + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/20221934/core/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala b/core/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala index 7b2005e..7419fa9 100644 --- a/core/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala @@ -56,8 +56,9 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage( } private def queuedRow(submission: MesosDriverDescription): Seq[Node] = { + val id = submission.submissionId - {submission.submissionId} + {id} {submission.submissionDate} {submission.command.mainClass} cpus: {submission.cores}, mem: {submission.mem} @@ -65,8 +66,9 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage( } private def driverRow(state: MesosClusterSubmissionState): Seq[Node] = { + val id = state.driverDescription.submissionId - {state.driverDescription.submissionId} + {id} {state.driverDescription.submissionDate} {state.driverDescription.command.mainClass} cpus: {state.driverDescription.cores}, mem: {state.driverDescription.mem} @@ -77,8 +79,9 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage( } private def retryRow(submission: MesosDriverDescription): Seq[Node] = { + val id = submission.submissionId - {submission.submissionId} + {id} {submission.submissionDate} {submission.command.mainClass} {submission.retryState.get.lastFailureStatus} http://git-wip-us.apache.org/repos/asf/spark/blob/20221934/core/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala b/core/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala index 4865d46..3f69354 100644 --- a/core/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala +++ b/core/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala @@ -39,6 +39,7 @@ private[spark] class MesosClusterUI( override def initialize() { attachPage(new MesosClusterPage(this)) + attachPage(new DriverPage(this)) attachHandler(createStaticHandler(MesosClusterUI.STATIC_RESOURCE_DIR, "/static")) } } http://git-wip-us.apache.org/repos/asf/spark/blob/20221934/core/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala index fd17a98..8198296 100644 --- a/core/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala @@ -53,7 +53,7 @@ private[spark] class MesosRestServer( new MesosStatusRequestServlet(scheduler, masterConf) } -private[deploy] class MesosSubmitRequestServlet( +private[mesos] class MesosSubmitRequestServlet( scheduler: MesosClusterScheduler, conf: SparkConf) extends SubmitRequestServlet { @@ -139,7 +139,7 @@ private[deploy] class MesosSubmitRequestServlet( } } -private[deploy] class MesosKillRequestServlet(scheduler: MesosClusterScheduler, conf: SparkConf) +private[mesos] class MesosKillRequestServlet(scheduler: MesosClusterScheduler, conf: SparkConf) extends KillRequestServlet { protected override def handleKill(submissionId: String): KillSubmissionResponse = { val k = scheduler.killDriver(submissionId) @@ -148,7 +148,7 @@ private[deploy] class MesosKillRequestServlet(scheduler: MesosClusterScheduler, } } -private[deploy] class MesosStatusRequestServlet(scheduler: MesosClusterScheduler, conf: SparkConf) +private[mesos] class MesosStatusRequestServlet(scheduler: MesosClusterScheduler, conf: SparkConf) extends StatusRequestServlet { protected override def handleStatus(submissionId: String): SubmissionStatusResponse = { val d = scheduler.getDriverStatus(submissionId) http://git-wip-us.apache.org/repos/asf/spark/blob/20221934/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala index 0396e62..06f0e28 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala @@ -50,12 +50,13 @@ private[spark] class MesosClusterSubmissionState( val taskId: TaskID, val slaveId: SlaveID, var mesosTaskStatus: Option[TaskStatus], - var startDate: Date) + var startDate: Date, + var finishDate: Option[Date]) extends Serializable { def copy(): MesosClusterSubmissionState = { new MesosClusterSubmissionState( - driverDescription, taskId, slaveId, mesosTaskStatus, startDate) + driverDescription, taskId, slaveId, mesosTaskStatus, startDate, finishDate) } } @@ -96,6 +97,14 @@ private[spark] class MesosClusterSchedulerState( val pendingRetryDrivers: Iterable[MesosDriverDescription]) /** + * The full state of a Mesos driver, that is being used to display driver information on the UI. + */ +private[spark] class MesosDriverState( + val state: String, + val description: MesosDriverDescription, + val submissionState: Option[MesosClusterSubmissionState] = None) + +/** * A Mesos scheduler that is responsible for launching submitted Spark drivers in cluster mode * as Mesos tasks in a Mesos cluster. * All drivers are launched asynchronously by the framework, which will eventually be launched @@ -233,6 +242,22 @@ private[spark] class MesosClusterScheduler( s } + /** + * Gets the driver state to be displayed on the Web UI. + */ + def getDriverState(submissionId: String): Option[MesosDriverState] = { + stateLock.synchronized { + queuedDrivers.find(_.submissionId.equals(submissionId)) + .map(d => new MesosDriverState("QUEUED", d)) + .orElse(launchedDrivers.get(submissionId) + .map(d => new MesosDriverState("RUNNING", d.driverDescription, Some(d)))) + .orElse(finishedDrivers.find(_.driverDescription.submissionId.equals(submissionId)) + .map(d => new MesosDriverState("FINISHED", d.driverDescription, Some(d)))) + .orElse(pendingRetryDrivers.find(_.submissionId.equals(submissionId)) + .map(d => new MesosDriverState("RETRYING", d))) + } + } + private def isQueueFull(): Boolean = launchedDrivers.size >= queuedCapacity /** @@ -439,7 +464,7 @@ private[spark] class MesosClusterScheduler( logTrace(s"Using offer ${offer.offer.getId.getValue} to launch driver " + submission.submissionId) val newState = new MesosClusterSubmissionState(submission, taskId, offer.offer.getSlaveId, - None, new Date()) + None, new Date(), None) launchedDrivers(submission.submissionId) = newState launchedDriversState.persist(submission.submissionId, newState) afterLaunchCallback(submission.submissionId) @@ -534,6 +559,7 @@ private[spark] class MesosClusterScheduler( // Check if the driver is supervise enabled and can be relaunched. if (state.driverDescription.supervise && shouldRelaunch(status.getState)) { removeFromLaunchedDrivers(taskId) + state.finishDate = Some(new Date()) val retryState: Option[MesosClusterRetryState] = state.driverDescription.retryState val (retries, waitTimeSec) = retryState .map { rs => (rs.retries + 1, Math.min(maxRetryWaitTime, rs.waitTime * 2)) } @@ -546,6 +572,7 @@ private[spark] class MesosClusterScheduler( pendingRetryDriversState.persist(taskId, newDriverDescription) } else if (TaskState.isFinished(TaskState.fromMesos(status.getState))) { removeFromLaunchedDrivers(taskId) + state.finishDate = Some(new Date()) if (finishedDrivers.size >= retainedDrivers) { val toRemove = math.max(retainedDrivers / 10, 1) finishedDrivers.trimStart(toRemove) http://git-wip-us.apache.org/repos/asf/spark/blob/20221934/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala index 8346a24..86a7d0f 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala @@ -23,7 +23,7 @@ import java.util.{ArrayList => JArrayList, Collections, List => JList} import scala.collection.JavaConversions._ import scala.collection.mutable.{HashMap, HashSet} -import org.apache.mesos.Protos.{ExecutorInfo => MesosExecutorInfo, TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _} +import org.apache.mesos.Protos.{ExecutorInfo => MesosExecutorInfo, TaskInfo => MesosTaskInfo, _} import org.apache.mesos.protobuf.ByteString import org.apache.mesos.{Scheduler => MScheduler, _} import org.apache.spark.executor.MesosExecutorBackend @@ -56,7 +56,7 @@ private[spark] class MesosSchedulerBackend( // The listener bus to publish executor added/removed events. val listenerBus = sc.listenerBus - + private[mesos] val mesosExecutorCores = sc.conf.getDouble("spark.mesos.mesosExecutor.cores", 1) @volatile var appId: String = _ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org