ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nc...@apache.org
Subject [09/50] [abbrv] ambari git commit: AMBARI-13913. Express Upgrade: didn't finalize HDFS, improve robustness for HA (alejandro)
Date Wed, 18 Nov 2015 16:51:05 GMT
AMBARI-13913. Express Upgrade: didn't finalize HDFS, improve robustness for HA (alejandro)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/644d8ba4
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/644d8ba4
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/644d8ba4

Branch: refs/heads/branch-dev-patch-upgrade
Commit: 644d8ba4bb654bd6c8cdaafc4f906bfae7b0a523
Parents: 55d0b18
Author: Alejandro Fernandez <afernandez@hortonworks.com>
Authored: Fri Nov 13 13:23:29 2015 -0800
Committer: Alejandro Fernandez <afernandez@hortonworks.com>
Committed: Mon Nov 16 17:26:49 2015 -0800

----------------------------------------------------------------------
 .../HDFS/2.1.0.2.0/package/scripts/namenode.py  |  9 ++++-
 .../package/scripts/namenode_upgrade.py         | 38 ++++++++++----------
 .../HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml |  2 +-
 .../HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml |  2 +-
 .../HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml |  2 +-
 .../HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml |  2 +-
 6 files changed, 31 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
index 1fada76..2d27724 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
@@ -178,6 +178,7 @@ class NameNodeDefault(NameNode):
     """
     During NonRolling (aka Express Upgrade), after starting NameNode, which is still in safemode,
and then starting
     all of the DataNodes, we need for NameNode to receive all of the block reports and leave
safemode.
+    If HA is present, then this command will run individually on each NameNode, which checks
for its own address.
     """
     import params
 
@@ -190,7 +191,13 @@ class NameNodeDefault(NameNode):
     try:
       hdfs_binary = self.get_hdfs_binary()
       # Note, this fails if namenode_address isn't prefixed with "params."
-      is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs {params.namenode_address}
-safemode get | grep 'Safe mode is OFF'")
+
+      is_namenode_safe_mode_off = ""
+      if params.dfs_ha_enabled:
+        is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs hdfs://{params.namenode_rpc}
-safemode get | grep 'Safe mode is OFF'")
+      else:
+        is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs {params.namenode_address}
-safemode get | grep 'Safe mode is OFF'")
+
       # Wait up to 30 mins
       Execute(is_namenode_safe_mode_off,
               tries=180,

http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
index f8a327f..4873b47 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
@@ -76,10 +76,11 @@ def prepare_upgrade_enter_safe_mode(hdfs_binary):
     # Safe to call if already in Safe Mode
     desired_state = SafeMode.ON
     safemode_transition_successful, original_state = reach_safemode_state(params.hdfs_user,
desired_state, params.dfs_ha_enabled, hdfs_binary)
+    Logger.info("Transition successful: {0}, original state: {1}".format(str(safemode_transition_successful),
str(original_state)))
     if not safemode_transition_successful:
       raise Fail("Could not transition to safemode state %s. Please check logs to make sure
namenode is up." % str(desired_state))
   except Exception, e:
-    message = format("Could not enter safemode. As the HDFS user, call this command: {safe_mode_enter_cmd}")
+    message = "Could not enter safemode. Error: {0}. As the HDFS user, call this command:
{1}".format(str(e), safe_mode_enter_cmd)
     Logger.error(message)
     raise Fail(message)
 
@@ -95,7 +96,7 @@ def prepare_upgrade_save_namespace(hdfs_binary):
     Logger.info("Checkpoint the current namespace.")
     as_user(save_namespace_cmd, params.hdfs_user, env={'PATH': params.hadoop_bin_dir})
   except Exception, e:
-    message = format("Could save the NameSpace. As the HDFS user, call this command: {save_namespace_cmd}")
+    message = format("Could not save the NameSpace. As the HDFS user, call this command:
{save_namespace_cmd}")
     Logger.error(message)
     raise Fail(message)
 
@@ -166,16 +167,22 @@ def reach_safemode_state(user, safemode_state, in_ha, hdfs_binary):
   import params
   original_state = SafeMode.UNKNOWN
 
-  hostname = params.hostname
-  safemode_check = format("{hdfs_binary} dfsadmin -safemode get")
+  safemode_base_command = ""
+  if params.dfs_ha_enabled:
+    safemode_base_command = format("{hdfs_binary} dfsadmin -fs hdfs://{params.namenode_rpc}
-safemode ")
+  else:
+    safemode_base_command = format("{hdfs_binary} dfsadmin -fs {params.namenode_address}
-safemode ")
+  safemode_check_cmd = safemode_base_command + " get"
+
+  grep_pattern = format("Safe mode is {safemode_state}")
+  safemode_check_with_grep = format("{safemode_check_cmd} | grep '{grep_pattern}'")
 
-  grep_pattern = format("Safe mode is {safemode_state} in {hostname}") if in_ha else format("Safe
mode is {safemode_state}")
-  safemode_check_with_grep = format("hdfs dfsadmin -safemode get | grep '{grep_pattern}'")
-  code, out = shell.call(safemode_check, user=user)
-  Logger.info("Command: %s\nCode: %d." % (safemode_check, code))
+  code, out = shell.call(safemode_check_cmd, user=user, logoutput=True)
+  Logger.info("Command: %s\nCode: %d." % (safemode_check_cmd, code))
   if code == 0 and out is not None:
     Logger.info(out)
-    re_pattern = r"Safe mode is (\S*) in " + hostname.replace(".", "\\.") if in_ha else r"Safe
mode is (\S*)"
+    re_pattern = r"Safe mode is (\S*)"
+    Logger.info("Pattern to search: {0}".format(re_pattern))
     m = re.search(re_pattern, out, re.IGNORECASE)
     if m and len(m.groups()) >= 1:
       original_state = m.group(1).upper()
@@ -184,7 +191,7 @@ def reach_safemode_state(user, safemode_state, in_ha, hdfs_binary):
         return (True, original_state)
       else:
         # Make a transition
-        command = "{0} dfsadmin -safemode {1}".format(hdfs_binary, safemode_to_instruction[safemode_state])
+        command = safemode_base_command + safemode_to_instruction[safemode_state]
         Execute(command,
                 user=user,
                 logoutput=True,
@@ -248,15 +255,8 @@ def finalize_upgrade(upgrade_type, hdfs_binary):
     kinit_command = format("{params.kinit_path_local} -kt {params.hdfs_user_keytab} {params.hdfs_principal_name}")

     Execute(kinit_command, user=params.hdfs_user, logoutput=True)
 
-  finalize_cmd = ""
-  query_cmd = ""
-  if upgrade_type == "rolling":
-    finalize_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade finalize")
-    query_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
-
-  elif upgrade_type == "nonrolling":
-    finalize_cmd = format("{hdfs_binary} dfsadmin -finalizeUpgrade")
-    query_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
+  finalize_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade finalize")
+  query_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
 
   Execute(query_cmd,
         user=params.hdfs_user,

http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
b/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
index efc3753..c2e9df4 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
@@ -373,7 +373,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>

http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
index fa69e72..950ece1 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
@@ -295,7 +295,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>

http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
index 6282fdc..160f0b8 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
@@ -552,7 +552,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>

http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
b/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
index 798c895..94fe413 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
@@ -336,7 +336,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>


Mime
View raw message