ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mpapirkovs...@apache.org
Subject ambari git commit: AMBARI-13856. Sometimes when HA is enabled NameNode does not wait to leave safe mode on start. (mpapirkovskyy)
Date Wed, 18 Nov 2015 21:17:40 GMT
Repository: ambari
Updated Branches:
  refs/heads/trunk d078af8cb -> 317f49b64


AMBARI-13856. Sometimes when HA is enabled NameNode does not wait to leave safe mode on start.
(mpapirkovskyy)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/317f49b6
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/317f49b6
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/317f49b6

Branch: refs/heads/trunk
Commit: 317f49b64a62279a1b160aa1d944bf0c97f3462b
Parents: d078af8
Author: Myroslav Papirkovskyi <mpapyrkovskyy@hortonworks.com>
Authored: Wed Nov 18 23:17:23 2015 +0200
Committer: Myroslav Papirkovskyi <mpapyrkovskyy@hortonworks.com>
Committed: Wed Nov 18 23:17:35 2015 +0200

----------------------------------------------------------------------
 .../2.1.0.2.0/package/scripts/hdfs_namenode.py  |  37 ++++++-
 .../python/stacks/2.0.6/HDFS/test_namenode.py   | 111 ++++++++++++++++++-
 2 files changed, 142 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/317f49b6/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
index f944b8d..44119ab 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
@@ -17,7 +17,7 @@ limitations under the License.
 
 """
 import os.path
-
+import time
 
 from resource_management.core import shell
 from resource_management.core.source import Template
@@ -143,9 +143,8 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None,
e
         check_for_safemode_off = True
         msg = "Must wait to leave safemode since High Availability is enabled during a Stack
Upgrade"
       else:
-        # During normal operations, the NameNode is expected to be up.
-        code, out = shell.call(is_active_namenode_cmd, logoutput=True) # If active NN, code
will be 0
-        if code == 0: # active
+        Logger.info("Wait for NameNode to become active.")
+        if is_active_namenode(hdfs_binary): # active
           check_for_safemode_off = True
           msg = "Must wait to leave safemode since High Availability is enabled and this
is the Active NameNode."
         else:
@@ -434,3 +433,33 @@ def bootstrap_standby_namenode(params, use_path=False):
   except Exception as ex:
     Logger.error('Bootstrap standby namenode threw an exception. Reason %s' %(str(ex)))
   return False
+
+
+def is_active_namenode(hdfs_binary):
+  """
+  Checks if current NameNode is active. Waits up to 30 seconds. If other NameNode is active
returns False.
+  :return: True if current NameNode is active, False otherwise
+  """
+  import params
+
+  if params.dfs_ha_enabled:
+    is_active_this_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir}
haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
+    is_active_other_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir}
haadmin -getServiceState {other_namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
+
+    for i in range(0, 5):
+      code, out = shell.call(is_active_this_namenode_cmd) # If active NN, code will be 0
+      if code == 0: # active
+        return True
+
+      code, out = shell.call(is_active_other_namenode_cmd) # If other NN is active, code
will be 0
+      if code == 0: # other NN is active
+        return False
+
+      if i < 4: # Do not sleep after last iteration
+        time.sleep(6)
+
+    Logger.info("Active NameNode is not found.")
+    return False
+
+  else:
+    return True

http://git-wip-us.apache.org/repos/asf/ambari/blob/317f49b6/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
index 353d91f..6f8dc32 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
@@ -21,6 +21,7 @@ from ambari_commons import OSCheck
 import json
 import os
 import tempfile
+import time
 from stacks.utils.RMFTestCase import *
 from mock.mock import MagicMock, patch, call
 import resource_management
@@ -481,6 +482,112 @@ class TestNamenode(RMFTestCase):
     )
     self.assertNoMoreResources()
 
+  @patch.object(shell, "call")
+  @patch.object(time, "sleep")
+  def test_start_ha_default_active_with_retry(self, sleep_mock, call_mocks):
+    call_mocks = MagicMock()
+    call_mocks.side_effect = [(1, None), (1, None), (1, None), (1, None), (0, None)]
+
+    self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py",
+                       classname = "NameNode",
+                       command = "start",
+                       config_file = "ha_default.json",
+                       hdp_stack_version = self.STACK_VERSION,
+                       target = RMFTestCase.TARGET_COMMON_SERVICES,
+                       call_mocks = call_mocks
+    )
+    self.assert_configure_default()
+    self.assertResourceCalled('File', '/etc/hadoop/conf/dfs.exclude',
+                              owner = 'hdfs',
+                              content = Template('exclude_hosts_list.j2'),
+                              group = 'hadoop',
+                              )
+    self.assertResourceCalled('Directory', '/var/run/hadoop',
+                              owner = 'hdfs',
+                              group = 'hadoop',
+                              mode = 0755
+                              )
+    self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid',
+        action = ['delete'],
+        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid
&& ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
+    )
+    self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit
-c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf start namenode'",
+        environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
+        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid
&& ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
+    )
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 -safemode get | grep
'Safe mode is OFF'",
+        tries=180,
+        try_sleep=10,
+        user="hdfs",
+        logoutput=True
+    )
+    self.assertResourceCalled('HdfsResource', '/tmp',
+        security_enabled = False,
+        only_if = "ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin
; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'",
+        keytab = UnknownConfigurationMock(),
+        hadoop_bin_dir = '/usr/bin',
+        default_fs = 'hdfs://ns1',
+        hdfs_site = self.getConfig()['configurations']['hdfs-site'],
+        kinit_path_local = '/usr/bin/kinit',
+        principal_name = None,
+        user = 'hdfs',
+        dfs_type = '',
+        owner = 'hdfs',
+        hadoop_conf_dir = '/etc/hadoop/conf',
+        type = 'directory',
+        action = ['create_on_execute'],
+        mode = 0777,
+    )
+    self.assertResourceCalled('HdfsResource', '/user/ambari-qa',
+        security_enabled = False,
+        only_if = "ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin
; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'",
+        keytab = UnknownConfigurationMock(),
+        hadoop_bin_dir = '/usr/bin',
+        default_fs = 'hdfs://ns1',
+        hdfs_site = self.getConfig()['configurations']['hdfs-site'],
+        kinit_path_local = '/usr/bin/kinit',
+        principal_name = None,
+        user = 'hdfs',
+        dfs_type = '',
+        owner = 'ambari-qa',
+        hadoop_conf_dir = '/etc/hadoop/conf',
+        type = 'directory',
+        action = ['create_on_execute'],
+        mode = 0770,
+    )
+    self.assertResourceCalled('HdfsResource', None,
+        security_enabled = False,
+        only_if = "ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin
; hdfs --config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'",
+        keytab = UnknownConfigurationMock(),
+        hadoop_bin_dir = '/usr/bin',
+        default_fs = 'hdfs://ns1',
+        hdfs_site = self.getConfig()['configurations']['hdfs-site'],
+        kinit_path_local = '/usr/bin/kinit',
+        principal_name = None,
+        user = 'hdfs',
+        dfs_type = '',
+        action = ['execute'],
+        hadoop_conf_dir = '/etc/hadoop/conf',
+    )
+    self.assertNoMoreResources()
+    self.assertTrue(call_mocks.called)
+    self.assertEqual(5, call_mocks.call_count)
+    calls = [
+        call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'"),
+        call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'"),
+        call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'"),
+        call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'"),
+        call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn1 | grep active'")]
+    call_mocks.assert_has_calls(calls)
+
   def test_start_ha_secured(self):
     self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py",
                        classname = "NameNode",
@@ -783,7 +890,7 @@ class TestNamenode(RMFTestCase):
     self.assertEqual(2, call_mocks.call_count)
     calls = [
       call('hdfs namenode -bootstrapStandby -nonInteractive', logoutput=False, user=u'hdfs'),
-      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'", logoutput=True)]
+      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'")]
     call_mocks.assert_has_calls(calls, any_order=False)
 
   # tests namenode start command when NameNode HA is enabled, and
@@ -892,7 +999,7 @@ class TestNamenode(RMFTestCase):
     self.assertTrue(call_mocks.called)
     self.assertEqual(3, call_mocks.call_count)
     calls = [
-      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'", logoutput=True),
+      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  PATH=/bin:/usr/bin ; hdfs
--config /etc/hadoop/conf haadmin -getServiceState nn2 | grep active'"),
       call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs'),
       call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs')]
     call_mocks.assert_has_calls(calls, any_order=True)


Mime
View raw message