ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jonathanhur...@apache.org
Subject [1/2] ambari git commit: AMBARI-17380 - The DataNode Unmounted Alert Produces False Alerts When file:// URIs Are Used (jonathanhurley)
Date Thu, 23 Jun 2016 02:08:38 GMT
Repository: ambari
Updated Branches:
  refs/heads/trunk 91b39b0f3 -> 193baea17


AMBARI-17380 - The DataNode Unmounted Alert Produces False Alerts When file:// URIs Are Used
(jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/193baea1
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/193baea1
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/193baea1

Branch: refs/heads/trunk
Commit: 193baea1774f1381e994dd8cf8d7d2eba4ec3fe2
Parents: e22b3d4
Author: Jonathan Hurley <jhurley@hortonworks.com>
Authored: Wed Jun 22 17:09:19 2016 -0400
Committer: Jonathan Hurley <jhurley@hortonworks.com>
Committed: Wed Jun 22 22:08:24 2016 -0400

----------------------------------------------------------------------
 .../alerts/alert_datanode_unmounted_data_dir.py | 41 +++++++++++++-------
 .../test_alert_datanode_unmounted_data_dir.py   | 34 +++++++++++++---
 2 files changed, 56 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/193baea1/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
index df85002..765831d 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
@@ -20,6 +20,7 @@ limitations under the License.
 
 import os
 import logging
+import urlparse
 
 from resource_management.libraries.functions import file_system
 from resource_management.libraries.functions import mounted_dirs_helper
@@ -52,6 +53,11 @@ def execute(configurations={}, parameters={}, host_name=None):
   configurations (dictionary): a mapping of configuration key to value
   parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
+
+  DataNode directories can be of the following formats and each needs to be supported:
+    /grid/dn/archive0
+    [SSD]/grid/dn/archive0
+    [ARCHIVE]file:///grid/dn/archive0
   """
   warnings = []
   errors = []
@@ -68,33 +74,40 @@ def execute(configurations={}, parameters={}, host_name=None):
   if dfs_data_dir is None:
     return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script and the value
is null'.format(DFS_DATA_DIR)])
 
-  data_dir_mount_file_exists = True
   # This follows symlinks and will return False for a broken link (even in the middle of
the linked list)
+  data_dir_mount_file_exists = True
   if not os.path.exists(DATA_DIR_MOUNT_FILE):
     data_dir_mount_file_exists = False
-    warnings.append("File not found, {0} .".format(DATA_DIR_MOUNT_FILE))
+    warnings.append("{0} was not found.".format(DATA_DIR_MOUNT_FILE))
 
-  valid_data_dirs = set()            # data dirs that have been normalized
+  normalized_data_dirs = set()            # data dirs that have been normalized
   data_dirs_not_exist = set()        # data dirs that do not exist
   data_dirs_unknown = set()          # data dirs for which could not determine mount
   data_dirs_on_root = set()          # set of data dirs that are on root mount
   data_dirs_on_mount = set()         # set of data dirs that are mounted on a device
   data_dirs_unmounted = []           # list of data dirs that are known to have become unmounted
 
+  # transform each data directory into something that we can use
   for data_dir in dfs_data_dir.split(","):
     if data_dir is None or data_dir.strip() == "":
       continue
+
     data_dir = data_dir.strip()
+
     # filter out data storage tags
     for tag in DATA_STORAGE_TAGS:
       if data_dir.startswith(tag):
         data_dir = data_dir.replace(tag, "")
         continue
-    valid_data_dirs.add(data_dir)
+
+    # parse the path in case it contains a URI scheme
+    data_dir = urlparse.urlparse(data_dir).path
+
+    normalized_data_dirs.add(data_dir)
 
   # Sort the data dirs, which is needed for deterministic behavior when running the unit
tests.
-  valid_data_dirs = sorted(valid_data_dirs)
-  for data_dir in valid_data_dirs:
+  normalized_data_dirs = sorted(normalized_data_dirs)
+  for data_dir in normalized_data_dirs:
     # This follows symlinks and will return False for a broken link (even in the middle of
the linked list)
     if os.path.isdir(data_dir):
       curr_mount_point = file_system.get_mount_point_for_dir(data_dir)
@@ -111,16 +124,16 @@ def execute(configurations={}, parameters={}, host_name=None):
       data_dirs_not_exist.add(data_dir)
 
   # To keep the messages consistent for all hosts, sort the sets into lists
-  valid_data_dirs = sorted(valid_data_dirs)
+  normalized_data_dirs = sorted(normalized_data_dirs)
   data_dirs_not_exist = sorted(data_dirs_not_exist)
   data_dirs_unknown = sorted(data_dirs_unknown)
   data_dirs_on_root = sorted(data_dirs_on_root)
 
   if data_dirs_not_exist:
-    errors.append("Data dir(s) not found: {0} .".format(", ".join(data_dirs_not_exist)))
+    errors.append("The following data dir(s) were not found: {0}\n".format("\n".join(data_dirs_not_exist)))
 
   if data_dirs_unknown:
-    errors.append("Cannot find mount point for data dir(s): {0} .".format(", ".join(data_dirs_unknown)))
+    errors.append("Cannot find the mount point for the following data dir(s):\n{0}".format("\n".join(data_dirs_unknown)))
 
   if data_dir_mount_file_exists:
     # This dictionary contains the expected values of <data_dir, mount_point>
@@ -135,13 +148,13 @@ def execute(configurations={}, parameters={}, host_name=None):
         data_dirs_unmounted.append(data_dir)
 
     if len(data_dirs_unmounted) > 0:
-      errors.append("Detected data dir(s) that became unmounted and are now writing to the
root partition: {0} .".format(", ".join(data_dirs_unmounted)))
+      errors.append("Detected data dir(s) that became unmounted and are now writing to the
root partition:\n{0}".format("\n".join(data_dirs_unmounted)))
   else:
     # Couldn't make guarantees about the expected value of mount points, so rely on this
strategy that is likely to work.
     # It will report false positives (aka false alarms) if the user actually intended to
have
     # 1+ data dirs on a mount and 1+ data dirs on the root partition.
     if len(data_dirs_on_mount) >= 1 and len(data_dirs_on_root) >= 1:
-      errors.append("Detected at least one data dir on a mount point, but these are writing
to the root partition: {0} .".format(", ".join(data_dirs_on_root)))
+      errors.append("Detected at least one data dir on a mount point, but these are writing
to the root partition:\n{0}".format("\n".join(data_dirs_on_root)))
 
   # Determine the status based on warnings and errors.
   if len(errors) == 0:
@@ -153,10 +166,10 @@ def execute(configurations={}, parameters={}, host_name=None):
       status = RESULT_STATE_WARNING
       messages += warnings
 
-    if len(valid_data_dirs) > 0:
-      messages.append("Data dir(s) are fine, {0} .".format(", ".join(valid_data_dirs)))
+    if len(normalized_data_dirs) > 0:
+      messages.append("The following data dir(s) are valid:\n{0}".format("\n".join(normalized_data_dirs)))
     else:
-      messages.append("No data dirs to analyze.")
+      messages.append("There are no data directories to analyze.")
 
     return (status, ["\n".join(messages)])
   else:

http://git-wip-us.apache.org/repos/asf/ambari/blob/193baea1/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
index c9bd187..c7dd47c 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
@@ -91,7 +91,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
     [status, messages] = alert.execute(configurations=configs)
     self.assertEqual(status, RESULT_STATE_WARNING)
     self.assertTrue(messages is not None and len(messages) == 1)
-    self.assertTrue("File not found, {0}".format(DATA_DIR_MOUNT_HIST_FILE_PATH) in messages[0])
+    self.assertTrue("{0} was not found".format(DATA_DIR_MOUNT_HIST_FILE_PATH) in messages[0])
 
   @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
   @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
@@ -117,7 +117,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
     [status, messages] = alert.execute(configurations=configs)
     self.assertEqual(status, RESULT_STATE_OK)
     self.assertTrue(messages is not None and len(messages) == 1)
-    self.assertTrue("Data dir(s) are fine" in messages[0])
+    self.assertTrue("The following data dir(s) are valid" in messages[0])
 
   @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
   @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
@@ -142,7 +142,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
     [status, messages] = alert.execute(configurations=configs)
     self.assertEqual(status, RESULT_STATE_OK)
     self.assertTrue(messages is not None and len(messages) == 1)
-    self.assertTrue("Data dir(s) are fine" in messages[0])
+    self.assertTrue("The following data dir(s) are valid" in messages[0])
 
   @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
   @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
@@ -166,7 +166,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
     [status, messages] = alert.execute(configurations=configs)
     self.assertEqual(status, RESULT_STATE_CRITICAL)
     self.assertTrue(messages is not None and len(messages) == 1)
-    self.assertTrue("Detected at least one data dir on a mount point, but these are writing
to the root partition: /grid/0/data, /grid/1/data" in messages[0])
+    self.assertTrue("Detected at least one data dir on a mount point, but these are writing
to the root partition:\n/grid/0/data\n/grid/1/data" in messages[0])
 
   @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
   @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
@@ -193,4 +193,28 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
     [status, messages] = alert.execute(configurations=configs)
     self.assertEqual(status, RESULT_STATE_CRITICAL)
     self.assertTrue(messages is not None and len(messages) == 1)
-    self.assertTrue("Detected data dir(s) that became unmounted and are now writing to the
root partition: /grid/1/data ." in messages[0])
\ No newline at end of file
+    self.assertTrue("Detected data dir(s) that became unmounted and are now writing to the
root partition:\n/grid/1/data" in messages[0])
+
+
+  @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
+  @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
+  @patch("os.path.exists")
+  @patch("os.path.isdir")
+  def test_file_uri_and_meta_tags(self, is_dir_mock, exists_mock, get_mount_mock, get_data_dir_to_mount_from_file_mock):
+    """
+    Test that the status is OK when the locations include file:// schemes and meta tags.
+    """
+    configs = {
+      "{{hdfs-site/dfs.datanode.data.dir}}":"[SSD]file:///grid/0/data"
+    }
+
+    # Mock calls
+    exists_mock.return_value = True
+    is_dir_mock.return_value = True
+    get_mount_mock.return_value = "/"
+    get_data_dir_to_mount_from_file_mock.return_value = {"/grid/0/data":"/"}
+
+    [status, messages] = alert.execute(configurations = configs)
+    self.assertEqual(status, RESULT_STATE_OK)
+    self.assertTrue(messages is not None and len(messages) == 1)
+    self.assertEqual("The following data dir(s) are valid:\n/grid/0/data", messages[0])
\ No newline at end of file


Mime
View raw message