ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aonis...@apache.org
Subject [1/2] ambari git commit: AMBARI-13856. Sometimes when HA is enabled NameNode does not wait to leave safe mode on start (aonishuk)
Date Thu, 12 Nov 2015 12:56:22 GMT
Repository: ambari
Updated Branches:
  refs/heads/branch-2.1 408f7b7f3 -> 4a989be67
  refs/heads/trunk 0c25a4dec -> 696e58bd8


AMBARI-13856. Sometimes when HA is enabled NameNode does not wait to leave safe mode on start
(aonishuk)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/696e58bd
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/696e58bd
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/696e58bd

Branch: refs/heads/trunk
Commit: 696e58bd869574f6c9b013360d2f32d5a9b883a4
Parents: 0c25a4d
Author: Andrew Onishuk <aonishuk@hortonworks.com>
Authored: Thu Nov 12 14:56:09 2015 +0200
Committer: Andrew Onishuk <aonishuk@hortonworks.com>
Committed: Thu Nov 12 14:56:09 2015 +0200

----------------------------------------------------------------------
 .../2.1.0.2.0/package/scripts/hdfs_namenode.py  | 43 ++++++++------------
 .../python/stacks/2.0.6/HDFS/test_namenode.py   | 17 ++++----
 2 files changed, 25 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/696e58bd/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
index f944b8d..d6a0a41 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
@@ -115,10 +115,11 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None,
e
       Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"),
               user = params.hdfs_user)
 
-    is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs {namenode_address} -safemode
get | grep 'Safe mode is OFF'")
     if params.dfs_ha_enabled:
+      is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs hdfs://{namenode_rpc}
-safemode get | grep 'Safe mode is OFF'")
       is_active_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin
-getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
     else:
+      is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs {namenode_address} -safemode
get | grep 'Safe mode is OFF'")
       is_active_namenode_cmd = True
     
     # During NonRolling Upgrade, both NameNodes are initially down,
@@ -129,30 +130,21 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None,
e
     # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____|
     # no-HA                 | ON -> OFF                | Yes                      |
     # HA and active         | ON -> OFF                | Yes                      |
-    # HA and standby        | no change                | no check                 |
+    # HA and standby        | ON -> OFF                | Yes                      |
     # RU with HA on active  | ON -> OFF                | Yes                      |
     # RU with HA on standby | ON -> OFF                | Yes                      |
     # EU with HA on active  | no change                | no check                 |
     # EU with HA on standby | no change                | no check                 |
     # EU non-HA             | no change                | no check                 |
 
-    check_for_safemode_off = False
     msg = ""
     if params.dfs_ha_enabled:
       if upgrade_type is not None:
-        check_for_safemode_off = True
         msg = "Must wait to leave safemode since High Availability is enabled during a Stack
Upgrade"
       else:
-        # During normal operations, the NameNode is expected to be up.
-        code, out = shell.call(is_active_namenode_cmd, logoutput=True) # If active NN, code
will be 0
-        if code == 0: # active
-          check_for_safemode_off = True
-          msg = "Must wait to leave safemode since High Availability is enabled and this
is the Active NameNode."
-        else:
-          msg = "Will remain in the current safemode state."
+        msg = "Must wait to leave safemode since High Availability is enabled."
     else:
       msg = "Must wait to leave safemode since High Availability is not enabled."
-      check_for_safemode_off = True
 
     Logger.info(msg)
 
@@ -161,20 +153,19 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None,
e
     if upgrade_type == "nonrolling":
       stay_in_safe_mode = True
 
-    if check_for_safemode_off:
-      Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
-      if not stay_in_safe_mode:
-        Logger.info("Wait to leafe safemode since must transition from ON to OFF.")
-        try:
-          # Wait up to 30 mins
-          Execute(is_namenode_safe_mode_off,
-                  tries=180,
-                  try_sleep=10,
-                  user=params.hdfs_user,
-                  logoutput=True
-          )
-        except Fail:
-          Logger.error("NameNode is still in safemode, please be careful with commands that
need safemode OFF.")
+    Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
+    if not stay_in_safe_mode:
+      Logger.info("Wait to leafe safemode since must transition from ON to OFF.")
+      try:
+        # Wait up to 30 mins
+        Execute(is_namenode_safe_mode_off,
+                tries=180,
+                try_sleep=10,
+                user=params.hdfs_user,
+                logoutput=True
+        )
+      except Fail:
+        Logger.error("NameNode is still in safemode, please be careful with commands that
need safemode OFF.")
 
     # Always run this on non-HA, or active NameNode during HA.
     create_hdfs_directories(is_active_namenode_cmd)

http://git-wip-us.apache.org/repos/asf/ambari/blob/696e58bd/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
index 353d91f..1ec8c8b 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
@@ -425,7 +425,7 @@ class TestNamenode(RMFTestCase):
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
         not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid
&& ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep
'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6401.ambari.apache.org:8020
-safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -519,7 +519,7 @@ class TestNamenode(RMFTestCase):
     self.assertResourceCalled('Execute', '/usr/bin/kinit -kt /etc/security/keytabs/hdfs.headless.keytab
hdfs',
         user = 'hdfs',
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep
'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6401.ambari.apache.org:8020
-safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -622,7 +622,7 @@ class TestNamenode(RMFTestCase):
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
         not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid
&& ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep
'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6401.ambari.apache.org:8020
-safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -724,7 +724,7 @@ class TestNamenode(RMFTestCase):
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
         not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid
&& ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep
'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6402.ambari.apache.org:8020
-safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -780,10 +780,10 @@ class TestNamenode(RMFTestCase):
     )
     self.assertNoMoreResources()
     self.assertTrue(call_mocks.called)
-    self.assertEqual(2, call_mocks.call_count)
+    self.assertEqual(1, call_mocks.call_count)
     calls = [
       call('hdfs namenode -bootstrapStandby -nonInteractive', logoutput=False, user=u'hdfs'),
-      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'", logoutput=True)]
+    ]
     call_mocks.assert_has_calls(calls, any_order=False)
 
   # tests namenode start command when NameNode HA is enabled, and
@@ -834,7 +834,7 @@ class TestNamenode(RMFTestCase):
                               environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
                               not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f
/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER]
-H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
                               )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep
'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6402.ambari.apache.org:8020
-safemode get | grep 'Safe mode is OFF'",
                               tries=180,
                               try_sleep=10,
                               user="hdfs",
@@ -890,9 +890,8 @@ class TestNamenode(RMFTestCase):
                               )
     self.assertNoMoreResources()
     self.assertTrue(call_mocks.called)
-    self.assertEqual(3, call_mocks.call_count)
+    self.assertEqual(2, call_mocks.call_count)
     calls = [
-      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'", logoutput=True),
       call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs'),
       call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs')]
     call_mocks.assert_has_calls(calls, any_order=True)


Mime
View raw message