flink-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject [2/3] flink git commit: [builds] Print Java process stack traces of stalled builds
Date Thu, 19 Feb 2015 15:22:19 GMT
[builds] Print Java process stack traces of stalled builds


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/341c859c
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/341c859c
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/341c859c

Branch: refs/heads/master
Commit: 341c859c1051853ba12b3f33135a3d6dc42215a4
Parents: a673e7b
Author: Ufuk Celebi <uce@apache.org>
Authored: Thu Feb 12 12:19:40 2015 +0100
Committer: Ufuk Celebi <uce@apache.org>
Committed: Thu Feb 19 12:08:58 2015 +0100

----------------------------------------------------------------------
 .travis.yml                  |   6 +-
 pom.xml                      |   2 +
 tools/travis_mvn_watchdog.sh | 136 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 143 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/341c859c/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 27e2770..3c5dc1b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -54,7 +54,11 @@ before_script:
 
 # we have to manually set the forkCount because maven thinks that the travis
 # machine has 32 cores
-script: "mvn -Dflink.forkCount=2 -B $PROFILE clean install verify"
+#script: "mvn -Dflink.forkCount=2 -B $PROFILE clean install verify"
+
+# We run mvn and monitor its output. If there is no output for the specified number of seconds,
we
+# print the stack traces of all running Java processes.
+script: "./tools/travis_mvn_watchdog.sh 300"
 
 # deploy if the first job is successful; should be replaced by an after_all_success if travis
finally supports it
 after_success: 

http://git-wip-us.apache.org/repos/asf/flink/blob/341c859c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 4a43ae0..81e896e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1050,6 +1050,8 @@ under the License.
 						<exclude>docs/_site/**</exclude>
 						<exclude>**/scalastyle-output.xml</exclude>
 						<exclude>build-target/**</exclude>
+						<!-- Tools: watchdog -->
+						<exclude>tools/watchdog*</exclude>
 					</excludes>
 				</configuration>
 			</plugin>

http://git-wip-us.apache.org/repos/asf/flink/blob/341c859c/tools/travis_mvn_watchdog.sh
----------------------------------------------------------------------
diff --git a/tools/travis_mvn_watchdog.sh b/tools/travis_mvn_watchdog.sh
new file mode 100755
index 0000000..e16000c
--- /dev/null
+++ b/tools/travis_mvn_watchdog.sh
@@ -0,0 +1,136 @@
+#!/usr/bin/env bash
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+HERE="`dirname \"$0\"`"				# relative
+HERE="`( cd \"$HERE\" && pwd )`" 	# absolutized and normalized
+if [ -z "$HERE" ] ; then
+	# error; for some reason, the path is not accessible
+	# to the script (e.g. permissions re-evaled after suid)
+	exit 1  # fail
+fi
+
+# =============================================================================
+# CONFIG
+# =============================================================================
+
+# Number of seconds w/o output before printing a stack trace and killing $MVN
+MAX_NO_OUTPUT=${1:-300}
+
+# Number of seconds to sleep before checking the output again
+SLEEP_TIME=20
+
+# Maven command to run
+MVN="mvn -Dflink.forkCount=2 -B $PROFILE clean install verify"
+
+MVN_PID="${HERE}/watchdog.mvn.pid"
+MVN_EXIT="${HERE}/watchdog.mvn.exit"
+MVN_OUT="${HERE}/watchdog.mvn.out"
+TRACE_OUT="${HERE}/watchdog.trace.out"
+
+# =============================================================================
+# FUNCTIONS
+# =============================================================================
+
+print_stacktraces () {
+	echo "=============================================================================="
+	echo "The following Java processes are running (JPS)"
+	echo "=============================================================================="
+
+	jps
+
+	local pids=( $(jps | awk '{print $1}') )
+
+	for pid in "${pids[@]}"; do
+		echo "=============================================================================="
+		echo "Printing stack trace of Java process ${pid}"
+		echo "=============================================================================="
+
+		jstack $pid
+	done
+}
+
+mod_time () {
+	if [[ `uname` == 'Darwin' ]]; then
+		eval $(stat -s $MVN_OUT)
+		echo $st_mtime
+	else
+		echo `stat -c "%Y" $MVN_OUT`
+	fi
+}
+
+the_time() {
+	echo `date +%s`
+}
+
+watchdog () {
+	touch $MVN_OUT
+
+	while true; do
+		sleep $SLEEP_TIME
+
+		time_diff=$((`the_time` - `mod_time`))
+
+		if [ $time_diff -ge $MAX_NO_OUTPUT ]; then
+			echo "=============================================================================="
+			echo "Maven produced no output for ${MAX_NO_OUTPUT} seconds."
+			echo "=============================================================================="
+
+			print_stacktraces | tee $TRACE_OUT
+
+			kill $(<$MVN_PID)
+
+			exit 1
+		fi
+	done
+}
+
+# =============================================================================
+# WATCHDOG
+# =============================================================================
+
+# Start watching $MVN_OUT
+watchdog &
+
+WD_PID=$!
+
+echo "STARTED watchdog (${WD_PID})."
+
+# Make sure to be in project root
+cd $HERE/../
+
+echo "RUNNING ${MVN} command."
+
+# Run $MVN and pipe output to $MVN_OUT for the watchdog. The PID is written to $MVN_PID to
+# allow the watchdog to kill $MVN if it is not producing any output anymore. $MVN_EXIT contains
+# the exit code. This is important for Travis' build life-cycle (success/failure).
+( $MVN & PID=$! ; echo $PID >&3 ; wait $PID ; echo $? >&4 ) 3>$MVN_PID
4>$MVN_EXIT | tee $MVN_OUT
+
+echo "Trying to KILL watchdog (${WD_PID})."
+
+# Make sure to kill the watchdog in any case after $MVN has completed
+( kill $WD_PID 2>&1 ) > /dev/null
+
+EXIT_CODE=$(<$MVN_EXIT)
+
+echo "MVN exited with EXIT CODE: ${EXIT_CODE}."
+
+rm $MVN_PID
+rm $MVN_EXIT
+
+exit $EXIT_CODE


Mime
View raw message