ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mithm...@apache.org
Subject ambari git commit: AMBARI-16646: Set vm.overcommit_memory dynamically for HAWQ (mithmatt)
Date Fri, 13 May 2016 22:10:30 GMT
Repository: ambari
Updated Branches:
  refs/heads/branch-2.4 9f124b8a4 -> 5fc37db51


AMBARI-16646: Set vm.overcommit_memory dynamically for HAWQ (mithmatt)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/5fc37db5
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/5fc37db5
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/5fc37db5

Branch: refs/heads/branch-2.4
Commit: 5fc37db5142bab3ff8298be0fe4653076362c0b7
Parents: 9f124b8
Author: Matt <mmathew@pivotal.io>
Authored: Fri May 13 15:09:52 2016 -0700
Committer: Matt <mmathew@pivotal.io>
Committed: Fri May 13 15:09:52 2016 -0700

----------------------------------------------------------------------
 .../HAWQ/2.0.0/service_advisor.py               |  28 +++--
 .../main/resources/stacks/service_advisor.py    |   3 +-
 .../stacks/2.3/HAWQ/test_service_advisor.py     | 123 +++++++++++++++++++
 .../stacks/2.3/common/test_stack_advisor.py     |  39 +++++-
 4 files changed, 180 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/5fc37db5/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py
b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py
index a26a398..276dd3a 100644
--- a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py
+++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py
@@ -120,11 +120,15 @@ class HAWQ200ServiceAdvisor(service_advisor.ServiceAdvisor):
     # Set dfs.allow.truncate to true
     putHdfsSiteProperty('dfs.allow.truncate', 'true')
 
-    if any(x in services["configurations"] for x in ["hawq-site", "hdfs-client"]):
+    if any(x in services["configurations"] for x in ["hawq-site", "hdfs-client", "hawq-sysctl-env"]):
       componentsListList = [service["components"] for service in services["services"]]
       componentsList = [item["StackServiceComponents"] for sublist in componentsListList
for item in sublist]
       servicesList = [service["StackServices"]["service_name"] for service in services["services"]]
-      numSegments = len(self.getHosts(componentsList, "HAWQSEGMENT"))
+      hawqMasterHosts = set(self.getHosts(componentsList, "HAWQMASTER")).union(set(self.getHosts(componentsList,
"HAWQSTANDBY")))
+      hawqSegmentHosts = set(self.getHosts(componentsList, "HAWQSEGMENT"))
+      hawqHosts = hawqMasterHosts.union(hawqSegmentHosts)
+      numSegments = len(hawqSegmentHosts)
+      minHawqHostsMemory = min([host['Hosts']['total_mem'] for host in hosts['items'] if
host['Hosts']['host_name'] in hawqHosts])
 
     if "hawq-site" in services["configurations"]:
       hawq_site = services["configurations"]["hawq-site"]["properties"]
@@ -157,11 +161,21 @@ class HAWQ200ServiceAdvisor(service_advisor.ServiceAdvisor):
           if hs_prop in hawq_site and ys_prop in yarn_site:
             putHawqSiteProperty(hs_prop, yarn_site[ys_prop])
 
+    # set vm.overcommit_memory to 2 if the minimum memory among all hawqHosts is greater
than 32GB
+    if "hawq-sysctl-env" in services["configurations"]:
+      MEM_THRESHOLD = 33554432 # 32GB, minHawqHostsMemory is represented in kB
+      hawq_sysctl_env = services["configurations"]["hawq-sysctl-env"]["properties"]
+      if "vm.overcommit_memory" in hawq_sysctl_env:
+        propertyValue = "2" if minHawqHostsMemory >= MEM_THRESHOLD else "1"
+        putHawqSysctlEnvProperty = self.putProperty(configurations, "hawq-sysctl-env", services)
+        putHawqSysctlEnvProperty("vm.overcommit_memory", propertyValue)
+
     # set output.replace-datanode-on-failure in HAWQ hdfs-client depending on the cluster
size
     if "hdfs-client" in services["configurations"]:
+      MIN_NUM_SEGMENT_THRESHOLD = 3
       hdfs_client = services["configurations"]["hdfs-client"]["properties"]
       if "output.replace-datanode-on-failure" in hdfs_client:
-        propertyValue = "true" if numSegments > 3 else "false"
+        propertyValue = "true" if numSegments > MIN_NUM_SEGMENT_THRESHOLD else "false"
         putHdfsClientProperty = self.putProperty(configurations, "hdfs-client", services)
         putHdfsClientProperty("output.replace-datanode-on-failure", propertyValue)
 
@@ -272,13 +286,13 @@ class HAWQ200ServiceAdvisor(service_advisor.ServiceAdvisor):
       numSegments = len(self.getHosts(componentsList, "HAWQSEGMENT"))
 
       message = None
-      limit = 3
-      if numSegments > limit and value != 'TRUE':
+      MIN_NUM_SEGMENT_THRESHOLD = 3
+      if numSegments > MIN_NUM_SEGMENT_THRESHOLD and value != 'TRUE':
         message = "{0} should be set to true (checked) for clusters with more than {1} HAWQ
Segments"
-      elif numSegments <= limit and value != 'FALSE':
+      elif numSegments <= MIN_NUM_SEGMENT_THRESHOLD and value != 'FALSE':
         message = "{0} should be set to false (unchecked) for clusters with {1} or less HAWQ
Segments"
 
       if message:
-        validationItems.append({"config-name": PROP_NAME, "item": self.getWarnItem(message.format(PROP_NAME,
str(limit)))})
+        validationItems.append({"config-name": PROP_NAME, "item": self.getWarnItem(message.format(PROP_NAME,
str(MIN_NUM_SEGMENT_THRESHOLD)))})
 
     return stackAdvisor.toConfigurationValidationProblems(validationItems, "hdfs-client")

http://git-wip-us.apache.org/repos/asf/ambari/blob/5fc37db5/ambari-server/src/main/resources/stacks/service_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/service_advisor.py b/ambari-server/src/main/resources/stacks/service_advisor.py
index 86682c5..3d6c293 100644
--- a/ambari-server/src/main/resources/stacks/service_advisor.py
+++ b/ambari-server/src/main/resources/stacks/service_advisor.py
@@ -172,7 +172,8 @@ class ServiceAdvisor(object):
   Returns the hosts which are running the given component.
   """
   def getHosts(self, componentsList, componentName):
-    return [component["hostnames"] for component in componentsList if component["component_name"]
== componentName][0]
+    hostNamesList = [component["hostnames"] for component in componentsList if component["component_name"]
== componentName]
+    return hostNamesList[0] if len(hostNamesList) > 0 else []
 
   """
   Utility method for setting a configuration property value.

http://git-wip-us.apache.org/repos/asf/ambari/blob/5fc37db5/ambari-server/src/test/python/stacks/2.3/HAWQ/test_service_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.3/HAWQ/test_service_advisor.py b/ambari-server/src/test/python/stacks/2.3/HAWQ/test_service_advisor.py
new file mode 100644
index 0000000..50f3a1f
--- /dev/null
+++ b/ambari-server/src/test/python/stacks/2.3/HAWQ/test_service_advisor.py
@@ -0,0 +1,123 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+from unittest import TestCase
+
+
+class TestHAWQ200ServiceAdvisor(TestCase):
+
+  def setUp(self):
+    import imp
+    self.testDirectory = os.path.dirname(os.path.abspath(__file__))
+    stackAdvisorPath = os.path.join(self.testDirectory, '../../../../../main/resources/stacks/stack_advisor.py')
+    hawq200ServiceAdvisorPath = os.path.join(self.testDirectory, '../../../../../main/resources/common-services/HAWQ/2.0.0/service_advisor.py')
+
+    with open(stackAdvisorPath, 'rb') as fp:
+      stack_advisor = imp.load_module('stack_advisor', fp, stackAdvisorPath, ('.py', 'rb',
imp.PY_SOURCE))
+    with open(hawq200ServiceAdvisorPath, 'rb') as fp:
+      service_advisor = imp.load_module('stack_advisor_impl', fp, hawq200ServiceAdvisorPath,
('.py', 'rb', imp.PY_SOURCE))
+
+    stackAdvisorClass = getattr(stack_advisor, 'StackAdvisor')
+    self.stackAdvisor = stackAdvisorClass()
+
+    serviceAdvisorClass = getattr(service_advisor, 'HAWQ200ServiceAdvisor')
+    self.serviceAdvisor = serviceAdvisorClass()
+
+  def test_getServiceConfigurationRecommendations(self):
+
+    configurations = {
+      "hawq-sysctl-env": {
+        "properties": {
+          "vm.overcommit_memory": "1"
+        }
+      }
+    }
+
+    services = {
+      "services": [
+        {
+          "StackServices": {
+            "service_name": "HAWQ",
+            "service_version": "2.0",
+            "stack_name": "HDP",
+            "stack_version": "2.3"
+          },
+          "components": [
+            {
+              "StackServiceComponents": {
+                "component_name": "HAWQMASTER",
+                "hostnames": [
+                  "c6401.ambari.apache.org"
+                ]
+              }
+            },
+            {
+              "StackServiceComponents": {
+                "component_name": "HAWQSEGMENT",
+                "hostnames": [
+                  "c6402.ambari.apache.org",
+                  "c6404.ambari.apache.org",
+                ]
+              }
+            }
+          ]
+        }
+      ],
+      "configurations": configurations
+    }
+
+    hosts = {
+      "items": [
+        {
+          "Hosts": {
+            "host_name": "c6401.ambari.apache.org",
+            "total_mem": 33554432
+          }
+        },
+        {
+          "Hosts": {
+            "host_name": "c6402.ambari.apache.org",
+            "total_mem": 33554433
+          }
+        },
+        {
+          "Hosts": {
+            "host_name": "c6403.ambari.apache.org",
+            "total_mem": 33554434
+          }
+        },
+        {
+          "Hosts": {
+            "host_name": "c6404.ambari.apache.org",
+            "total_mem": 33554435
+          }
+        }
+      ]
+    }
+
+    ## Test if vm.overcommit_memory is set correctly
+
+    # Case 1: All machines have total_mem above 32GB (total_mem >= 33554432)
+    self.serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
None, services, hosts)
+    self.assertEquals(configurations["hawq-sysctl-env"]["properties"]["vm.overcommit_memory"],
"2")
+
+    # Case 2: One machine has total_mem below 32GB
+    hosts["items"][0]["Hosts"]["total_mem"] = 33554431
+    self.serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
None, services, hosts)
+    self.assertEquals(configurations["hawq-sysctl-env"]["properties"]["vm.overcommit_memory"],
"1")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/5fc37db5/ambari-server/src/test/python/stacks/2.3/common/test_stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.3/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.3/common/test_stack_advisor.py
index be0f3e4..6981205 100644
--- a/ambari-server/src/test/python/stacks/2.3/common/test_stack_advisor.py
+++ b/ambari-server/src/test/python/stacks/2.3/common/test_stack_advisor.py
@@ -1989,6 +1989,35 @@ class TestHDP23StackAdvisor(TestCase):
 
   def test_recommendHAWQConfigurations(self):
 
+    hosts = {
+      "items": [
+        {
+          "Hosts": {
+            "host_name": "c6401.ambari.apache.org",
+            "total_mem": 12345678
+          }
+        },
+        {
+          "Hosts": {
+            "host_name": "c6402.ambari.apache.org",
+            "total_mem": 12345678
+          }
+        },
+        {
+          "Hosts": {
+            "host_name": "c6403.ambari.apache.org",
+            "total_mem": 12345678
+          }
+        },
+        {
+          "Hosts": {
+            "host_name": "c6404.ambari.apache.org",
+            "total_mem": 12345678
+          }
+        }
+      ]
+    }
+
     # original cluster data with 3 segments
     services = self.load_json("services-normal-hawq-3-hosts.json")
     componentsListList = [service["components"] for service in services["services"]]
@@ -2012,7 +2041,7 @@ class TestHDP23StackAdvisor(TestCase):
     # Test 1 - with 3 segments
     self.assertEquals(len(hawqSegmentComponent["hostnames"]), 3)
     serviceAdvisor = self.createHAWQServiceAdvisor()
-    serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
clusterData, services, None)
+    serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
clusterData, services, hosts)
     self.assertEquals(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"],
str(3 * 6))
     self.assertEquals(configurations["hdfs-client"]["properties"]["output.replace-datanode-on-failure"],
"false")
 
@@ -2022,19 +2051,19 @@ class TestHDP23StackAdvisor(TestCase):
 
     # Test 2 - with 100 segments
     hawqSegmentComponent["hostnames"] = ["host" + str(i) for i in range(100)]
-    serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
clusterData, services, None)
+    serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
clusterData, services, hosts)
     self.assertEquals(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"],
str(100 * 5))
     self.assertEquals(configurations["hdfs-client"]["properties"]["output.replace-datanode-on-failure"],
"true")
 
     # Test 3 - with 512 segments
     hawqSegmentComponent["hostnames"] = ["host" + str(i) for i in range(512)]
-    serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
clusterData, services, None)
+    serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
clusterData, services, hosts)
     self.assertEquals(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"],
"512")
     self.assertEquals(configurations["hdfs-client"]["properties"]["output.replace-datanode-on-failure"],
"true")
 
     # Test 4 - with 513 segments
     hawqSegmentComponent["hostnames"] = ["host" + str(i) for i in range(513)]
-    serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
clusterData, services, None)
+    serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
clusterData, services, hosts)
     self.assertEquals(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"],
"512")
     self.assertEquals(configurations["hdfs-client"]["properties"]["output.replace-datanode-on-failure"],
"true")
 
@@ -2042,7 +2071,7 @@ class TestHDP23StackAdvisor(TestCase):
     configurations = {}
     services["configurations"]["hawq-site"] = {"properties":{'hawq-site': {'properties':
{}}}}
     hawqSegmentComponent["hostnames"] = []
-    serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
clusterData, services, None)
+    serviceAdvisor.getServiceConfigurationRecommendations(self.stackAdvisor, configurations,
clusterData, services, hosts)
     self.assertEquals(configurations, {'hdfs-client': {'properties': {'output.replace-datanode-on-failure':
'false'}},
                                        'hawq-site': {'properties': {}},  'hdfs-site': {'properties':
{'dfs.allow.truncate': 'true'}}})
 


Mime
View raw message