flink-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From se...@apache.org
Subject [6/7] flink git commit: [FLINK-5340] [metrics] Add an uptime and downtime metric to the Execution Graph.
Date Wed, 29 Mar 2017 22:51:13 GMT
[FLINK-5340] [metrics] Add an uptime and downtime metric to the Execution Graph.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/719d0cf1
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/719d0cf1
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/719d0cf1

Branch: refs/heads/master
Commit: 719d0cf19664556e62f808469fb641127c3f4410
Parents: 231bec8
Author: Stephan Ewen <sewen@apache.org>
Authored: Wed Mar 29 18:24:13 2017 +0200
Committer: Stephan Ewen <sewen@apache.org>
Committed: Wed Mar 29 21:52:47 2017 +0200

----------------------------------------------------------------------
 .../executiongraph/ExecutionGraphBuilder.java   |  4 +
 .../executiongraph/metrics/DownTimeGauge.java   | 79 ++++++++++++++++++++
 .../executiongraph/metrics/UpTimeGauge.java     | 67 +++++++++++++++++
 3 files changed, 150 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/719d0cf1/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java
b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java
index f1da8bd..a6455f5 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java
@@ -33,7 +33,9 @@ import org.apache.flink.runtime.checkpoint.CompletedCheckpointStore;
 import org.apache.flink.runtime.checkpoint.CheckpointStatsTracker;
 import org.apache.flink.runtime.client.JobExecutionException;
 import org.apache.flink.runtime.client.JobSubmissionException;
+import org.apache.flink.runtime.executiongraph.metrics.DownTimeGauge;
 import org.apache.flink.runtime.executiongraph.metrics.RestartTimeGauge;
+import org.apache.flink.runtime.executiongraph.metrics.UpTimeGauge;
 import org.apache.flink.runtime.executiongraph.restart.RestartStrategy;
 import org.apache.flink.runtime.instance.SlotProvider;
 import org.apache.flink.runtime.jobgraph.JobGraph;
@@ -247,6 +249,8 @@ public class ExecutionGraphBuilder {
 		// create all the metrics for the Execution Graph
 
 		metrics.gauge(RestartTimeGauge.METRIC_NAME, new RestartTimeGauge(executionGraph));
+		metrics.gauge(DownTimeGauge.METRIC_NAME, new DownTimeGauge(executionGraph));
+		metrics.gauge(UpTimeGauge.METRIC_NAME, new UpTimeGauge(executionGraph));
 
 		return executionGraph;
 	}

http://git-wip-us.apache.org/repos/asf/flink/blob/719d0cf1/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/DownTimeGauge.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/DownTimeGauge.java
b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/DownTimeGauge.java
new file mode 100644
index 0000000..5f24587
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/DownTimeGauge.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.executiongraph.metrics;
+
+import org.apache.flink.metrics.Gauge;
+import org.apache.flink.runtime.executiongraph.ExecutionGraph;
+import org.apache.flink.runtime.jobgraph.JobStatus;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * A gauge that returns (in milliseconds) how long a job has not been not running any
+ * more, in case it is in a failing/recovering situation. Running jobs return naturally
+ * a value of zero.
+ * 
+ * <p>For jobs that have never run (new not yet scheduled jobs), this gauge returns
+ * {@value NOT_YET_RUNNING}, and for jobs that are not running any more, it returns
+ * {@value NO_LONGER_RUNNING}. 
+ */
+public class DownTimeGauge implements Gauge<Long> {
+
+	public static final String METRIC_NAME = "downtime";
+
+	private static final long NOT_YET_RUNNING = 0L;
+
+	private static final long NO_LONGER_RUNNING = -1L;
+
+	// ------------------------------------------------------------------------
+
+	private final ExecutionGraph eg;
+
+	public DownTimeGauge(ExecutionGraph executionGraph) {
+		this.eg = checkNotNull(executionGraph);
+	}
+
+	// ------------------------------------------------------------------------
+
+	@Override
+	public Long getValue() {
+		final JobStatus status = eg.getState();
+
+		if (status == JobStatus.RUNNING) {
+			// running right now - no downtime
+			return 0L;
+		}
+		else if (status.isTerminalState()) {
+			// not running any more -> finished or not on leader
+			return NO_LONGER_RUNNING;
+		}
+		else {
+			final long runningTimestamp = eg.getStatusTimestamp(JobStatus.RUNNING);
+			if (runningTimestamp > 0) {
+				// job was running at some point and is not running now
+				// we use 'Math.max' here to avoid negative timestamps when clocks change
+				return Math.max(System.currentTimeMillis() - runningTimestamp, 0);
+			}
+			else {
+				// job was never scheduled so far
+				return NOT_YET_RUNNING;
+			}
+		}
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/719d0cf1/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/UpTimeGauge.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/UpTimeGauge.java
b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/UpTimeGauge.java
new file mode 100644
index 0000000..d3f6224
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/UpTimeGauge.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.executiongraph.metrics;
+
+import org.apache.flink.metrics.Gauge;
+import org.apache.flink.runtime.executiongraph.ExecutionGraph;
+import org.apache.flink.runtime.jobgraph.JobStatus;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * A gauge that returns (in milliseconds) how long a job has been running.
+ * 
+ * <p>For jobs that are not running any more, it returns {@value NO_LONGER_RUNNING}.

+ */
+public class UpTimeGauge implements Gauge<Long> {
+
+	public static final String METRIC_NAME = "uptime";
+
+	private static final long NO_LONGER_RUNNING = -1L;
+
+	// ------------------------------------------------------------------------
+
+	private final ExecutionGraph eg;
+
+	public UpTimeGauge(ExecutionGraph executionGraph) {
+		this.eg = checkNotNull(executionGraph);
+	}
+
+	// ------------------------------------------------------------------------
+
+	@Override
+	public Long getValue() {
+		final JobStatus status = eg.getState();
+
+		if (status == JobStatus.RUNNING) {
+			// running right now - report the uptime
+			final long runningTimestamp = eg.getStatusTimestamp(JobStatus.RUNNING);
+			// we use 'Math.max' here to avoid negative timestamps when clocks change
+			return Math.max(System.currentTimeMillis() - runningTimestamp, 0);
+		}
+		else if (status.isTerminalState()) {
+			// not running any more -> finished or not on leader
+			return NO_LONGER_RUNNING;
+		}
+		else {
+			// not yet running or not up at the moment
+			return 0L;
+		}
+	}
+}
\ No newline at end of file


Mime
View raw message