ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aonis...@apache.org
Subject [1/2] ambari git commit: AMBARI-16914. Ambari uses too small a window for region server shutdown (aonishuk)
Date Mon, 13 Jun 2016 15:26:59 GMT
Repository: ambari
Updated Branches:
  refs/heads/branch-2.4 afdbe4918 -> f276ee9a7
  refs/heads/trunk bd71e62d3 -> b220d26f7


AMBARI-16914. Ambari uses too small a window for region server shutdown (aonishuk)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/b220d26f
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/b220d26f
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/b220d26f

Branch: refs/heads/trunk
Commit: b220d26f7c158aa48338018ec281a3dab34929d5
Parents: bd71e62
Author: Andrew Onishuk <aonishuk@hortonworks.com>
Authored: Mon Jun 13 18:26:51 2016 +0300
Committer: Andrew Onishuk <aonishuk@hortonworks.com>
Committed: Mon Jun 13 18:26:51 2016 +0300

----------------------------------------------------------------------
 .../0.1.0/configuration/ams-hbase-env.xml            | 15 +++++++++++++++
 .../0.1.0/package/scripts/hbase_service.py           |  2 +-
 .../0.1.0/package/scripts/params_linux.py            |  3 +++
 .../HBASE/0.96.0.2.0/configuration/hbase-env.xml     | 15 +++++++++++++++
 .../0.96.0.2.0/package/scripts/hbase_service.py      |  2 +-
 .../HBASE/0.96.0.2.0/package/scripts/params_linux.py |  1 +
 .../0.96.0.2.0/package/scripts/phoenix_service.py    |  2 --
 .../stacks/2.0.6/HBASE/test_phoenix_queryserver.py   |  4 ----
 .../test/python/stacks/2.0.6/configs/default.json    |  6 ++++--
 .../test/python/stacks/2.0.6/configs/secured.json    |  3 ++-
 10 files changed, 42 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml
b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml
index b40923a..4c866d9 100644
--- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml
+++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml
@@ -157,6 +157,21 @@
     </description>
     <on-ambari-upgrade add="true"/>
   </property>
+  <property>
+    <name>hbase_regionserver_shutdown_timeout</name>
+    <value>30</value>
+    <display-name>HBase RegionServer shutdown timeout</display-name>
+    <description>
+After this number of seconds waiting for graceful stop of HBase Master it will be forced
to exit with SIGKILL.
+The timeout is introduced because there is a known bug when from time to time HBase RegionServer
hangs forever on stop if NN safemode is on.
+    </description>
+    <value-attributes>
+      <type>directory</type>
+      <overridable>false</overridable>
+      <editable-only-at-install>true</editable-only-at-install>
+    </value-attributes>
+    <on-ambari-upgrade add="true"/>
+  </property>
   <!-- hbase-env.sh -->
   <property>
     <name>content</name>

http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py
b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py
index ba4725b..42f23bf 100644
--- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py
+++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py
@@ -45,7 +45,7 @@ def hbase_service(
       Execute ( daemon_cmd,
         user = params.hbase_user,
         # BUGFIX: hbase regionserver sometimes hangs when nn is in safemode
-        timeout = 30,
+        timeout = params.hbase_regionserver_shutdown_timeout,
         on_timeout = format("{no_op_test} && {sudo} -H -E kill -9 `{sudo} cat {pid_file}`")
       )
       

http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params_linux.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params_linux.py
b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params_linux.py
index 13ec279..52e9fe5 100644
--- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params_linux.py
+++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params_linux.py
@@ -24,6 +24,7 @@ from resource_management.libraries.functions.default import default
 from resource_management.libraries.functions.format import format
 from ambari_commons import OSCheck
 from ambari_commons.constants import AMBARI_SUDO_BINARY
+from resource_management.libraries.functions.expect import expect
 
 config = Script.get_config()
 
@@ -53,3 +54,5 @@ sudo = AMBARI_SUDO_BINARY
 
 dfs_type = default("/commandParams/dfs_type", "")
 
+hbase_regionserver_shutdown_timeout = expect('/configurations/ams-hbase-env/hbase_regionserver_shutdown_timeout',
int)
+

http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/configuration/hbase-env.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/configuration/hbase-env.xml
b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/configuration/hbase-env.xml
index eaee3cf..93ca6ba 100644
--- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/configuration/hbase-env.xml
+++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/configuration/hbase-env.xml
@@ -139,6 +139,21 @@ and the -Xmn ratio (hbase_regionserver_xmn_ratio) exceeds this value.
     <description>HBase keytab path</description>
     <on-ambari-upgrade add="true"/>
   </property>
+  <property>
+    <name>hbase_regionserver_shutdown_timeout</name>
+    <value>30</value>
+    <display-name>HBase RegionServer shutdown timeout</display-name>
+    <description>
+After this number of seconds waiting for graceful stop of HBase Master it will be forced
to exit with SIGKILL.
+The timeout is introduced because there is a known bug when from time to time HBase RegionServer
hangs forever on stop if NN safemode is on.
+    </description>
+    <value-attributes>
+      <type>directory</type>
+      <overridable>false</overridable>
+      <editable-only-at-install>true</editable-only-at-install>
+    </value-attributes>
+    <on-ambari-upgrade add="true"/>
+  </property>
   <!-- hbase-env.sh -->
   <property>
     <name>content</name>

http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_service.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_service.py
b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_service.py
index e9e8803..1dbd560 100644
--- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_service.py
+++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_service.py
@@ -54,7 +54,7 @@ def hbase_service(
           user = params.hbase_user,
           only_if = no_op_test,
           # BUGFIX: hbase regionserver sometimes hangs when nn is in safemode
-          timeout = 30,
+          timeout = params.hbase_regionserver_shutdown_timeout,
           on_timeout = format("! ( {no_op_test} ) || {sudo} -H -E kill -9 `{pid_expression}`"),
         )
       except:

http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py
b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py
index 05bad1c..76cefe7 100644
--- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py
+++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py
@@ -116,6 +116,7 @@ regionserver_xmn_max = config['configurations']['hbase-env']['hbase_regionserver
 regionserver_xmn_percent = expect("/configurations/hbase-env/hbase_regionserver_xmn_ratio",
float)
 regionserver_xmn_size = calc_xmn_from_xms(regionserver_heapsize, regionserver_xmn_percent,
regionserver_xmn_max)
 
+hbase_regionserver_shutdown_timeout = expect('/configurations/hbase-env/hbase_regionserver_shutdown_timeout',
int)
 
 phoenix_hosts = default('/clusterHostInfo/phoenix_query_server_hosts', [])
 phoenix_enabled = default('/configurations/hbase-env/phoenix_sql_enabled', False)

http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/phoenix_service.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/phoenix_service.py
b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/phoenix_service.py
index 0a42cda..42d9cd1 100644
--- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/phoenix_service.py
+++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/phoenix_service.py
@@ -43,8 +43,6 @@ def phoenix_service(action = 'start'): # 'start', 'stop', 'status'
   
       elif action == 'stop':
         Execute(daemon_cmd,
-                timeout = 30,
-                on_timeout = format("! ( {no_op_test} ) || {sudo} -H -E kill -9 `cat {pid_file}`"),
                 user=format("{hbase_user}"),
                 environment=env
         )

http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_phoenix_queryserver.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_phoenix_queryserver.py
b/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_phoenix_queryserver.py
index ac8b153..e4f5b2f 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_phoenix_queryserver.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_phoenix_queryserver.py
@@ -80,8 +80,6 @@ class TestPhoenixQueryServer(RMFTestCase):
 
     self.assertResourceCalled('Execute',
       '/usr/hdp/current/phoenix-server/bin/queryserver.py stop',
-      on_timeout = '! ( ls /var/run/hbase/phoenix-hbase-server.pid >/dev/null 2>&1
&& ps -p `cat /var/run/hbase/phoenix-hbase-server.pid` >/dev/null 2>&1 )
|| ambari-sudo.sh -H -E kill -9 `cat /var/run/hbase/phoenix-hbase-server.pid`',
-      timeout = 30,
       environment = {'JAVA_HOME':'/usr/jdk64/jdk1.8.0_40',
       'HBASE_CONF_DIR':'/usr/hdp/current/hbase-regionserver/conf'},
       user = 'hbase'
@@ -140,8 +138,6 @@ class TestPhoenixQueryServer(RMFTestCase):
 
     self.assertResourceCalled('Execute',
       '/usr/hdp/current/phoenix-server/bin/queryserver.py stop',
-      on_timeout = '! ( ls /var/run/hbase/phoenix-hbase-server.pid >/dev/null 2>&1
&& ps -p `cat /var/run/hbase/phoenix-hbase-server.pid` >/dev/null 2>&1 )
|| ambari-sudo.sh -H -E kill -9 `cat /var/run/hbase/phoenix-hbase-server.pid`',
-      timeout = 30,
       environment = {'JAVA_HOME':'/usr/jdk64/jdk1.8.0_40',
       'HBASE_CONF_DIR':'/usr/hdp/current/hbase-regionserver/conf'},
       user = 'hbase'

http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/test/python/stacks/2.0.6/configs/default.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/configs/default.json b/ambari-server/src/test/python/stacks/2.0.6/configs/default.json
index 04aa828..d4310f1 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/configs/default.json
+++ b/ambari-server/src/test/python/stacks/2.0.6/configs/default.json
@@ -610,7 +610,8 @@
             "hbase_regionserver_xmn_max": "512",
             "hbase_regionserver_xmn_ratio": "0.2",
             "hbase_log_dir": "/var/log/hbase",
-            "hbase_java_io_tmpdir" : "/tmp"
+            "hbase_java_io_tmpdir" : "/tmp",
+            "hbase_regionserver_shutdown_timeout": "30"
         },
         "ganglia-env": {
             "gmond_user": "nobody",
@@ -812,7 +813,8 @@
             "hbase_regionserver_heapsize": "512m",
             "hbase_log_dir": "/var/log/ambari-metrics-collector",
             "hbase_master_xmn_size": "256m",
-            "content": "\n"
+            "content": "\n",
+            "hbase_regionserver_shutdown_timeout": "30"
         },
         "ams-log4j": {
             "content": "\n"

http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json b/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json
index 02f982e..fac0649 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json
+++ b/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json
@@ -658,7 +658,8 @@
             "hbase_regionserver_xmn_ratio": "0.2",
             "hbase_log_dir": "/var/log/hbase",
             "hbase_user_keytab": "/etc/security/keytabs/hbase.headless.keytab",
-            "hbase_java_io_tmpdir" : "/tmp"
+            "hbase_java_io_tmpdir" : "/tmp",
+            "hbase_regionserver_shutdown_timeout": "30"
         },
         "flume-env": {
             "content": "export JAVA_HOME={{java64_home}}",


Mime
View raw message