impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tarmstr...@apache.org
Subject [1/3] incubator-impala git commit: IMPALA-3684, IMPALA-3693: Disable core files for breakpad tests
Date Fri, 10 Jun 2016 00:31:04 GMT
Repository: incubator-impala
Updated Branches:
  refs/heads/master 4e7172f6f -> ca62ce65e


IMPALA-3684, IMPALA-3693: Disable core files for breakpad tests

The breakpad tests were writing core files when triggering minidump
writes. This was actually not needed and interfered with test execution
and artifact collection. Most notably processes would take a long time
to terminate while writing core files (IMPALA-3684). The core files
would also be wrongly collected by Jenkins (IMPALA-3693).

This change adds code to stop test clusters reliably, making
test_breakpad independent from calling setup-impala-cluster.py via
os.system. It also disables core dumps for the duration of the test and
re-enables them afterwards.

Change-Id: If592339632aa662b59be09d911229566d5772321
Reviewed-on: http://gerrit.cloudera.org:8080/3339
Reviewed-by: Michael Brown <mikeb@cloudera.com>
Reviewed-by: Silvius Rus <srus@cloudera.com>
Tested-by: Lars Volker <lv@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ca62ce65
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ca62ce65
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ca62ce65

Branch: refs/heads/master
Commit: ca62ce65e9c652eecc9cb883842bd43594627be8
Parents: abd6ad3
Author: Lars Volker <lv@cloudera.com>
Authored: Tue Jun 7 18:24:14 2016 +0200
Committer: Tim Armstrong <tarmstrong@cloudera.com>
Committed: Thu Jun 9 17:31:00 2016 -0700

----------------------------------------------------------------------
 tests/custom_cluster/test_breakpad.py | 47 +++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ca62ce65/tests/custom_cluster/test_breakpad.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py
index 4abd34b..4b6aa2d 100644
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py
@@ -15,10 +15,12 @@
 import glob
 import os
 import pytest
+import psutil
 import shutil
 import tempfile
 import time
 
+from resource import setrlimit, RLIMIT_CORE, RLIM_INFINITY
 from signal import SIGSEGV, SIGKILL
 
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
@@ -45,14 +47,21 @@ class TestBreakpad(CustomClusterTestSuite):
   def teardown_method(self, method):
     # Override parent
     # Stop the cluster to prevent future accesses to self.tmp_dir.
-    self._stop_impala_cluster()
+    self.kill_cluster(SIGKILL)
     assert self.tmp_dir
     shutil.rmtree(self.tmp_dir)
 
   @classmethod
-  def teardown_class(cls):
+  def setup_class(cls):
     if cls.exploration_strategy() != 'exhaustive':
-      return
+      pytest.skip('breakpad tests only run in exhaustive')
+    # Disable core dumps for this test
+    setrlimit(RLIMIT_CORE, (0, RLIM_INFINITY))
+
+  @classmethod
+  def teardown_class(cls):
+    # Re-enable core dumps
+    setrlimit(RLIMIT_CORE, (RLIM_INFINITY, RLIM_INFINITY))
     # Start default cluster for subsequent tests (verify_metrics).
     cls._start_impala_cluster([])
 
@@ -71,15 +80,28 @@ class TestBreakpad(CustomClusterTestSuite):
 
   def kill_cluster(self, signal):
     self.cluster.refresh()
-    cluster = self.cluster
-    for impalad in cluster.impalads:
-      impalad.kill(signal)
-    cluster.statestored.kill(signal)
-    cluster.catalogd.kill(signal)
-    # Wait for daemons to finish writing minidumps
-    time.sleep(1)
+    processes = self.cluster.impalads + [self.cluster.catalogd, self.cluster.statestored]
+    processes = filter(None, processes)
+    self.kill_processes(processes, signal)
     self.assert_all_processes_killed()
 
+  def kill_processes(self, processes, signal):
+    for process in processes:
+      process.kill(signal)
+    self.wait_for_all_processes_dead(processes)
+
+  def wait_for_all_processes_dead(self, processes, timeout=300):
+    for process in processes:
+      try:
+        pid = process.get_pid()
+        if not pid:
+          continue
+        psutil_process = psutil.Process(pid)
+        psutil_process.wait(timeout)
+      except psutil.TimeoutExpired:
+        raise RuntimeError("Unable to kill %s (pid %d) after %d seconds." %
+            (psutil_process.name, psutil_process.pid, timeout))
+
   def assert_all_processes_killed(self):
     self.cluster.refresh()
     assert not self.cluster.impalads
@@ -158,11 +180,10 @@ class TestBreakpad(CustomClusterTestSuite):
     """Kill a single impalad with SIGSEGV to make it write a minidump. Kill the rest of
     the cluster. Clean up the single minidump file and return its size.
     """
+    self.cluster.refresh()
     assert len(self.cluster.impalads) > 0
     # Make one impalad write a minidump.
-    self.cluster.impalads[0].kill(SIGSEGV)
-    # Wait for the minidump to be written before killing the rest of the cluster.
-    time.sleep(1)
+    self.kill_processes(self.cluster.impalads[:1], SIGSEGV)
     # Kill the rest of the cluster.
     self.kill_cluster(SIGKILL)
     assert self.count_minidumps('impalad') == 1


Mime
View raw message