ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From alejan...@apache.org
Subject ambari git commit: AMBARI-13913. Express Upgrade: didn't finalize HDFS, improve robustness for HA (alejandro)
Date Tue, 17 Nov 2015 01:29:27 GMT
Repository: ambari
Updated Branches:
  refs/heads/branch-2.1 ea0a4bed7 -> 8140a0d09


AMBARI-13913. Express Upgrade: didn't finalize HDFS, improve robustness for HA (alejandro)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/8140a0d0
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/8140a0d0
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/8140a0d0

Branch: refs/heads/branch-2.1
Commit: 8140a0d09b72f8eb3185764a970b263245da3ce9
Parents: ea0a4be
Author: Alejandro Fernandez <afernandez@hortonworks.com>
Authored: Mon Nov 16 17:29:02 2015 -0800
Committer: Alejandro Fernandez <afernandez@hortonworks.com>
Committed: Mon Nov 16 17:29:20 2015 -0800

----------------------------------------------------------------------
 .../HDFS/2.1.0.2.0/package/scripts/namenode.py  |  9 ++++-
 .../package/scripts/namenode_upgrade.py         | 38 ++++++++++----------
 .../HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml |  2 +-
 .../HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml |  2 +-
 .../HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml |  2 +-
 .../HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml |  2 +-
 6 files changed, 31 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/8140a0d0/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
index 5dfb01e..9800ff1 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
@@ -178,6 +178,7 @@ class NameNodeDefault(NameNode):
     """
     During NonRolling (aka Express Upgrade), after starting NameNode, which is still in safemode,
and then starting
     all of the DataNodes, we need for NameNode to receive all of the block reports and leave
safemode.
+    If HA is present, then this command will run individually on each NameNode, which checks
for its own address.
     """
     import params
 
@@ -190,7 +191,13 @@ class NameNodeDefault(NameNode):
     try:
       hdfs_binary = self.get_hdfs_binary()
       # Note, this fails if namenode_address isn't prefixed with "params."
-      is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs {params.namenode_address}
-safemode get | grep 'Safe mode is OFF'")
+
+      is_namenode_safe_mode_off = ""
+      if params.dfs_ha_enabled:
+        is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs hdfs://{params.namenode_rpc}
-safemode get | grep 'Safe mode is OFF'")
+      else:
+        is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs {params.namenode_address}
-safemode get | grep 'Safe mode is OFF'")
+
       # Wait up to 30 mins
       Execute(is_namenode_safe_mode_off,
               tries=180,

http://git-wip-us.apache.org/repos/asf/ambari/blob/8140a0d0/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
index f8a327f..4873b47 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
@@ -76,10 +76,11 @@ def prepare_upgrade_enter_safe_mode(hdfs_binary):
     # Safe to call if already in Safe Mode
     desired_state = SafeMode.ON
     safemode_transition_successful, original_state = reach_safemode_state(params.hdfs_user,
desired_state, params.dfs_ha_enabled, hdfs_binary)
+    Logger.info("Transition successful: {0}, original state: {1}".format(str(safemode_transition_successful),
str(original_state)))
     if not safemode_transition_successful:
       raise Fail("Could not transition to safemode state %s. Please check logs to make sure
namenode is up." % str(desired_state))
   except Exception, e:
-    message = format("Could not enter safemode. As the HDFS user, call this command: {safe_mode_enter_cmd}")
+    message = "Could not enter safemode. Error: {0}. As the HDFS user, call this command:
{1}".format(str(e), safe_mode_enter_cmd)
     Logger.error(message)
     raise Fail(message)
 
@@ -95,7 +96,7 @@ def prepare_upgrade_save_namespace(hdfs_binary):
     Logger.info("Checkpoint the current namespace.")
     as_user(save_namespace_cmd, params.hdfs_user, env={'PATH': params.hadoop_bin_dir})
   except Exception, e:
-    message = format("Could save the NameSpace. As the HDFS user, call this command: {save_namespace_cmd}")
+    message = format("Could not save the NameSpace. As the HDFS user, call this command:
{save_namespace_cmd}")
     Logger.error(message)
     raise Fail(message)
 
@@ -166,16 +167,22 @@ def reach_safemode_state(user, safemode_state, in_ha, hdfs_binary):
   import params
   original_state = SafeMode.UNKNOWN
 
-  hostname = params.hostname
-  safemode_check = format("{hdfs_binary} dfsadmin -safemode get")
+  safemode_base_command = ""
+  if params.dfs_ha_enabled:
+    safemode_base_command = format("{hdfs_binary} dfsadmin -fs hdfs://{params.namenode_rpc}
-safemode ")
+  else:
+    safemode_base_command = format("{hdfs_binary} dfsadmin -fs {params.namenode_address}
-safemode ")
+  safemode_check_cmd = safemode_base_command + " get"
+
+  grep_pattern = format("Safe mode is {safemode_state}")
+  safemode_check_with_grep = format("{safemode_check_cmd} | grep '{grep_pattern}'")
 
-  grep_pattern = format("Safe mode is {safemode_state} in {hostname}") if in_ha else format("Safe
mode is {safemode_state}")
-  safemode_check_with_grep = format("hdfs dfsadmin -safemode get | grep '{grep_pattern}'")
-  code, out = shell.call(safemode_check, user=user)
-  Logger.info("Command: %s\nCode: %d." % (safemode_check, code))
+  code, out = shell.call(safemode_check_cmd, user=user, logoutput=True)
+  Logger.info("Command: %s\nCode: %d." % (safemode_check_cmd, code))
   if code == 0 and out is not None:
     Logger.info(out)
-    re_pattern = r"Safe mode is (\S*) in " + hostname.replace(".", "\\.") if in_ha else r"Safe
mode is (\S*)"
+    re_pattern = r"Safe mode is (\S*)"
+    Logger.info("Pattern to search: {0}".format(re_pattern))
     m = re.search(re_pattern, out, re.IGNORECASE)
     if m and len(m.groups()) >= 1:
       original_state = m.group(1).upper()
@@ -184,7 +191,7 @@ def reach_safemode_state(user, safemode_state, in_ha, hdfs_binary):
         return (True, original_state)
       else:
         # Make a transition
-        command = "{0} dfsadmin -safemode {1}".format(hdfs_binary, safemode_to_instruction[safemode_state])
+        command = safemode_base_command + safemode_to_instruction[safemode_state]
         Execute(command,
                 user=user,
                 logoutput=True,
@@ -248,15 +255,8 @@ def finalize_upgrade(upgrade_type, hdfs_binary):
     kinit_command = format("{params.kinit_path_local} -kt {params.hdfs_user_keytab} {params.hdfs_principal_name}")

     Execute(kinit_command, user=params.hdfs_user, logoutput=True)
 
-  finalize_cmd = ""
-  query_cmd = ""
-  if upgrade_type == "rolling":
-    finalize_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade finalize")
-    query_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
-
-  elif upgrade_type == "nonrolling":
-    finalize_cmd = format("{hdfs_binary} dfsadmin -finalizeUpgrade")
-    query_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
+  finalize_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade finalize")
+  query_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
 
   Execute(query_cmd,
         user=params.hdfs_user,

http://git-wip-us.apache.org/repos/asf/ambari/blob/8140a0d0/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
b/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
index efc3753..c2e9df4 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
@@ -373,7 +373,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>

http://git-wip-us.apache.org/repos/asf/ambari/blob/8140a0d0/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
index fa69e72..950ece1 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
@@ -295,7 +295,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>

http://git-wip-us.apache.org/repos/asf/ambari/blob/8140a0d0/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
index 9d88e84..0dd327d 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
@@ -552,7 +552,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>

http://git-wip-us.apache.org/repos/asf/ambari/blob/8140a0d0/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
b/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
index 798c895..94fe413 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
@@ -336,7 +336,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>


Mime
View raw message