Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 6C68D200BF8 for ; Fri, 9 Dec 2016 00:10:40 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 6B0E4160B27; Thu, 8 Dec 2016 23:10:40 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id C9FB9160B33 for ; Fri, 9 Dec 2016 00:10:37 +0100 (CET) Received: (qmail 95236 invoked by uid 500); 8 Dec 2016 23:10:36 -0000 Mailing-List: contact commits-help@ambari.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: ambari-dev@ambari.apache.org Delivered-To: mailing list commits@ambari.apache.org Received: (qmail 94933 invoked by uid 99); 8 Dec 2016 23:10:36 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 08 Dec 2016 23:10:36 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 803D8E108E; Thu, 8 Dec 2016 23:10:36 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: alejandro@apache.org To: commits@ambari.apache.org Date: Thu, 08 Dec 2016 23:10:43 -0000 Message-Id: <9c0866f2a02244d6b049571dff3c65a6@git.apache.org> In-Reply-To: <537d5761456b4173b523fdb894d96c61@git.apache.org> References: <537d5761456b4173b523fdb894d96c61@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [08/20] ambari git commit: AMBARI-19137. HDP 3.0 TP - move ZK, HFDS, YARN/MR into new common-services version (alejandro) archived-at: Thu, 08 Dec 2016 23:10:40 -0000 http://git-wip-us.apache.org/repos/asf/ambari/blob/3bf5d32d/ambari-server/src/main/resources/common-services/YARN/3.0.0/YARN_widgets.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0/YARN_widgets.json b/ambari-server/src/main/resources/common-services/YARN/3.0.0/YARN_widgets.json new file mode 100644 index 0000000..782f21d --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0/YARN_widgets.json @@ -0,0 +1,670 @@ +{ + "layouts": [ + { + "layout_name": "default_yarn_dashboard", + "display_name": "Standard YARN Dashboard", + "section_name": "YARN_SUMMARY", + "widgetLayoutInfo": [ + { + "widget_name": "Memory Utilization", + "description": "Percentage of total memory allocated to containers running in the cluster.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "yarn.QueueMetrics.Queue=root.AllocatedMB", + "metric_path": "metrics/yarn/Queue/root/AllocatedMB", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AvailableMB", + "metric_path": "metrics/yarn/Queue/root/AvailableMB", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + } + ], + "values": [ + { + "name": "Memory Utilization", + "value": "${(yarn.QueueMetrics.Queue=root.AllocatedMB / (yarn.QueueMetrics.Queue=root.AllocatedMB + yarn.QueueMetrics.Queue=root.AvailableMB)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "CPU Utilization", + "description": "Percentage of total virtual cores allocated to containers running in the cluster.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "yarn.QueueMetrics.Queue=root.AllocatedVCores", + "metric_path": "metrics/yarn/Queue/root/AllocatedVCores", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AvailableVCores", + "metric_path": "metrics/yarn/Queue/root/AvailableVCores", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + } + ], + "values": [ + { + "name": "Total Allocatable CPU Utilized across NodeManager", + "value": "${(yarn.QueueMetrics.Queue=root.AllocatedVCores / (yarn.QueueMetrics.Queue=root.AllocatedVCores + yarn.QueueMetrics.Queue=root.AvailableVCores)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Bad Local Disks", + "description": "Number of unhealthy local disks across all NodeManagers.", + "widget_type": "NUMBER", + "is_visible": true, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.BadLocalDirs", + "metric_path": "metrics/yarn/BadLocalDirs", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.BadLogDirs", + "metric_path": "metrics/yarn/BadLogDirs", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Number of unhealthy local disks for NodeManager", + "value": "${yarn.NodeManagerMetrics.BadLocalDirs + yarn.NodeManagerMetrics.BadLogDirs}" + } + ], + "properties": { + "display_unit": "" + } + }, + { + "widget_name": "Container Failures", + "description": "Percentage of all containers failing in the cluster.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.ContainersFailed._rate", + "metric_path": "metrics/yarn/ContainersFailed._rate", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersCompleted._rate", + "metric_path": "metrics/yarn/ContainersCompleted._rate", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersLaunched._rate", + "metric_path": "metrics/yarn/ContainersLaunched._rate", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersIniting._sum", + "metric_path": "metrics/yarn/ContainersIniting._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersKilled._rate", + "metric_path": "metrics/yarn/ContainersKilled._rate", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersRunning._sum", + "metric_path": "metrics/yarn/ContainersRunning._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Container Failures", + "value": "${(yarn.NodeManagerMetrics.ContainersFailed._rate/(yarn.NodeManagerMetrics.ContainersFailed._rate + yarn.NodeManagerMetrics.ContainersCompleted._rate + yarn.NodeManagerMetrics.ContainersLaunched._rate + yarn.NodeManagerMetrics.ContainersIniting._sum + yarn.NodeManagerMetrics.ContainersKilled._rate + yarn.NodeManagerMetrics.ContainersRunning._sum)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "App Failures", + "description": "Percentage of all launched applications failing in the cluster.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "yarn.QueueMetrics.Queue=root.AppsFailed._rate", + "metric_path": "metrics/yarn/Queue/root/AppsFailed._rate", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AppsKilled._rate", + "metric_path": "metrics/yarn/Queue/root/AppsKilled._rate", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AppsPending", + "metric_path": "metrics/yarn/Queue/root/AppsPending", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AppsRunning", + "metric_path": "metrics/yarn/Queue/root/AppsRunning", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AppsSubmitted._rate", + "metric_path": "metrics/yarn/Queue/root/AppsSubmitted._rate", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + }, + { + "name": "yarn.QueueMetrics.Queue=root.AppsCompleted._rate", + "metric_path": "metrics/yarn/Queue/root/AppsCompleted._rate", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + } + ], + "values": [ + { + "name": "App Failures", + "value": "${(yarn.QueueMetrics.Queue=root.AppsFailed._rate/(yarn.QueueMetrics.Queue=root.AppsFailed._rate + yarn.QueueMetrics.Queue=root.AppsKilled._rate + yarn.QueueMetrics.Queue=root.AppsPending + yarn.QueueMetrics.Queue=root.AppsRunning + yarn.QueueMetrics.Queue=root.AppsSubmitted._rate + yarn.QueueMetrics.Queue=root.AppsCompleted._rate)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Pending Apps", + "description": "Count of applications waiting for cluster resources to become available.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "yarn.QueueMetrics.Queue=root.AppsPending", + "metric_path": "metrics/yarn/Queue/root/AppsPending", + "service_name": "YARN", + "component_name": "RESOURCEMANAGER", + "host_component_criteria": "host_components/HostRoles/ha_state=ACTIVE" + } + ], + "values": [ + { + "name": "Pending Apps", + "value": "${yarn.QueueMetrics.Queue=root.AppsPending}" + } + ], + "properties": { + "display_unit": "Apps", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Cluster Memory", + "description": "Percentage of memory used across all NodeManager hosts.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "mem_total._sum", + "metric_path": "metrics/memory/mem_total._avg", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "mem_free._sum", + "metric_path": "metrics/memory/mem_free._avg", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Memory utilization", + "value": "${((mem_total._sum - mem_free._sum)/mem_total._sum) * 100}" + } + ], + "properties": { + "display_unit": "%", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Cluster Disk", + "description": "Sum of disk throughput for all NodeManager hosts.", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "read_bps._sum", + "metric_path": "metrics/disk/read_bps._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "write_bps._sum", + "metric_path": "metrics/disk/write_bps._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Read throughput", + "value": "${read_bps._sum/1048576}" + }, + { + "name": "Write throughput", + "value": "${write_bps._sum/1048576}" + } + ], + "properties": { + "display_unit": "Mbps", + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Cluster Network", + "description": "Average of Network utilized across all NodeManager hosts.", + "default_section_name": "YARN_SUMMARY", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "pkts_in._avg", + "metric_path": "metrics/network/pkts_in._avg", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "pkts_out._avg", + "metric_path": "metrics/network/pkts_out._avg", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Packets In", + "value": "${pkts_in._avg}" + }, + { + "name": "Packets Out", + "value": "${pkts_out._avg}" + } + ], + "properties": { + "graph_type": "LINE", + "time_range": "1" + } + }, + { + "widget_name": "Cluster CPU", + "description": "Percentage of CPU utilized across all NodeManager hosts.", + "default_section_name": "YARN_SUMMARY", + "widget_type": "GRAPH", + "is_visible": true, + "metrics": [ + { + "name": "cpu_system._sum", + "metric_path": "metrics/cpu/cpu_system._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "cpu_user._sum", + "metric_path": "metrics/cpu/cpu_user._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "cpu_nice._sum", + "metric_path": "metrics/cpu/cpu_nice._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "cpu_idle._sum", + "metric_path": "metrics/cpu/cpu_idle._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "cpu_wio._sum", + "metric_path": "metrics/cpu/cpu_wio._sum", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "CPU utilization", + "value": "${((cpu_system._sum + cpu_user._sum + cpu_nice._sum)/(cpu_system._sum + cpu_user._sum + cpu_nice._sum + cpu_idle._sum + cpu_wio._sum)) * 100}" + } + ], + "properties": { + "graph_type": "LINE", + "time_range": "1", + "display_unit": "%" + } + } + ] + }, + { + "layout_name": "default_yarn_heatmap", + "display_name": "YARN Heatmaps", + "section_name": "YARN_HEATMAPS", + "widgetLayoutInfo": [ + { + "widget_name": "YARN local disk space utilization per NodeManager", + "description": "", + "widget_type": "HEATMAP", + "is_visible": true, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.GoodLocalDirsDiskUtilizationPerc", + "metric_path": "metrics/yarn/GoodLocalDirsDiskUtilizationPerc", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.GoodLogDirsDiskUtilizationPerc", + "metric_path": "metrics/yarn/GoodLogDirsDiskUtilizationPerc", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "YARN local disk space utilization per NodeManager", + "value": "${(yarn.NodeManagerMetrics.GoodLocalDirsDiskUtilizationPerc + yarn.NodeManagerMetrics.GoodLogDirsDiskUtilizationPerc)/2}" + } + ], + "properties": { + "display_unit": "%", + "max_limit": "100" + } + }, + { + "widget_name": "Total Allocatable RAM Utilized per NodeManager", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.AllocatedGB", + "metric_path": "metrics/yarn/AllocatedGB", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.AvailableGB", + "metric_path": "metrics/yarn/AvailableGB", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Total Allocatable RAM Utilized per NodeManager", + "value": "${(yarn.NodeManagerMetrics.AllocatedGB/(yarn.NodeManagerMetrics.AvailableGB + yarn.NodeManagerMetrics.AllocatedGB)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "max_limit": "100" + } + }, + { + "widget_name": "Total Allocatable CPU Utilized per NodeManager", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.AllocatedVCores", + "metric_path": "metrics/yarn/AllocatedVCores", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.AvailableVCores", + "metric_path": "metrics/yarn/AvailableVCores", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Total Allocatable CPU Utilized per NodeManager", + "value": "${(yarn.NodeManagerMetrics.AllocatedVCores/(yarn.NodeManagerMetrics.AllocatedVCores + yarn.NodeManagerMetrics.AvailableVCores)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "max_limit": "100" + } + }, + { + "widget_name": "Container Failures", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.ContainersFailed._rate", + "metric_path": "metrics/yarn/ContainersFailed._rate", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersCompleted._rate", + "metric_path": "metrics/yarn/ContainersCompleted._rate", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersLaunched._rate", + "metric_path": "metrics/yarn/ContainersLaunched._rate", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersIniting", + "metric_path": "metrics/yarn/ContainersIniting", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersKilled._rate", + "metric_path": "metrics/yarn/ContainersKilled._rate", + "service_name": "YARN", + "component_name": "NODEMANAGER" + }, + { + "name": "yarn.NodeManagerMetrics.ContainersRunning", + "metric_path": "metrics/yarn/ContainersRunning", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Container Failures", + "value": "${(yarn.NodeManagerMetrics.ContainersFailed._rate/(yarn.NodeManagerMetrics.ContainersFailed._rate + yarn.NodeManagerMetrics.ContainersCompleted._rate + yarn.NodeManagerMetrics.ContainersLaunched._rate + yarn.NodeManagerMetrics.ContainersIniting + yarn.NodeManagerMetrics.ContainersKilled._rate + yarn.NodeManagerMetrics.ContainersRunning)) * 100}" + } + ], + "properties": { + "display_unit": "%", + "max_limit": "100" + } + }, + { + "widget_name": "NodeManager GC Time", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "Hadoop:service=NodeManager,name=JvmMetrics.GcTimeMillis", + "metric_path": "metrics/jvm/gcTimeMillis", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "NodeManager Garbage Collection Time", + "value": "${Hadoop:service=NodeManager,name=JvmMetrics.GcTimeMillis}" + } + ], + "properties": { + "display_unit": "ms", + "max_limit": "10000" + } + }, + { + "widget_name": "NodeManager JVM Heap Memory Used", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "Hadoop:service=NodeManager,name=JvmMetrics.MemHeapUsedM", + "metric_path": "metrics/jvm/memHeapUsedM", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "NodeManager JVM Heap Memory Used", + "value": "${Hadoop:service=NodeManager,name=JvmMetrics.MemHeapUsedM}" + } + ], + "properties": { + "display_unit": "MB", + "max_limit": "512" + } + }, + { + "widget_name": "Allocated Containers", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.AllocatedContainers", + "metric_path": "metrics/yarn/AllocatedContainers", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "Allocated Containers", + "value": "${yarn.NodeManagerMetrics.AllocatedContainers}" + } + ], + "properties": { + "display_unit": "", + "max_limit": "100" + } + }, + { + "widget_name": "NodeManager RAM Utilized", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.AllocatedGB", + "metric_path": "metrics/yarn/AllocatedGB", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "NodeManager RAM Utilized", + "value": "${yarn.NodeManagerMetrics.AllocatedGB}" + } + ], + "properties": { + "display_unit": "", + "max_limit": "100" + } + }, + { + "widget_name": "NodeManager CPU Utilized", + "description": "", + "widget_type": "HEATMAP", + "is_visible": false, + "metrics": [ + { + "name": "yarn.NodeManagerMetrics.AllocatedVCores", + "metric_path": "metrics/yarn/AllocatedVCores", + "service_name": "YARN", + "component_name": "NODEMANAGER" + } + ], + "values": [ + { + "name": "NodeManager CPU Utilized", + "value": "${yarn.NodeManagerMetrics.AllocatedVCores}" + } + ], + "properties": { + "display_unit": "", + "max_limit": "100" + } + } + ] + } + ] +} http://git-wip-us.apache.org/repos/asf/ambari/blob/3bf5d32d/ambari-server/src/main/resources/common-services/YARN/3.0.0/alerts.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0/alerts.json b/ambari-server/src/main/resources/common-services/YARN/3.0.0/alerts.json new file mode 100644 index 0000000..c4a58bb --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0/alerts.json @@ -0,0 +1,392 @@ +{ + "MAPREDUCE2": { + "service": [], + "HISTORYSERVER": [ + { + "name": "mapreduce_history_server_webui", + "label": "History Server Web UI", + "description": "This host-level alert is triggered if the History Server Web UI is unreachable.", + "interval": 1, + "scope": "ANY", + "source": { + "type": "WEB", + "uri": { + "http": "{{mapred-site/mapreduce.jobhistory.webapp.address}}", + "https": "{{mapred-site/mapreduce.jobhistory.webapp.https.address}}", + "https_property": "{{mapred-site/mapreduce.jobhistory.http.policy}}", + "https_property_value": "HTTPS_ONLY", + "kerberos_keytab": "{{mapred-site/mapreduce.jobhistory.webapp.spnego-keytab-file}}", + "kerberos_principal": "{{mapred-site/mapreduce.jobhistory.webapp.spnego-principal}}", + "connection_timeout": 5.0 + }, + "reporting": { + "ok": { + "text": "HTTP {0} response in {2:.3f}s" + }, + "warning":{ + "text": "HTTP {0} response from {1} in {2:.3f}s ({3})" + }, + "critical": { + "text": "Connection failed to {1} ({3})" + } + } + } + }, + { + "name": "mapreduce_history_server_cpu", + "label": "History Server CPU Utilization", + "description": "This host-level alert is triggered if the percent of CPU utilization on the History Server exceeds the configured critical threshold. The threshold values are in percent.", + "interval": 5, + "scope": "ANY", + "enabled": true, + "source": { + "type": "METRIC", + "uri": { + "http": "{{mapred-site/mapreduce.jobhistory.webapp.address}}", + "kerberos_keytab": "{{mapred-site/mapreduce.jobhistory.webapp.spnego-keytab-file}}", + "kerberos_principal": "{{mapred-site/mapreduce.jobhistory.webapp.spnego-principal}}", + "https": "{{mapred-site/mapreduce.jobhistory.webapp.https.address}}", + "https_property": "{{mapred-site/mapreduce.jobhistory.http.policy}}", + "https_property_value": "HTTPS_ONLY", + "connection_timeout": 5.0 + }, + "reporting": { + "ok": { + "text": "{1} CPU, load {0:.1%}" + }, + "warning": { + "text": "{1} CPU, load {0:.1%}", + "value": 200 + }, + "critical": { + "text": "{1} CPU, load {0:.1%}", + "value": 250 + }, + "units" : "%", + "type": "PERCENT" + }, + "jmx": { + "property_list": [ + "java.lang:type=OperatingSystem/SystemCpuLoad", + "java.lang:type=OperatingSystem/AvailableProcessors" + ], + "value": "{0} * 100" + } + } + }, + { + "name": "mapreduce_history_server_rpc_latency", + "label": "History Server RPC Latency", + "description": "This host-level alert is triggered if the History Server operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for operations. The threshold values are in milliseconds.", + "interval": 5, + "scope": "ANY", + "enabled": true, + "source": { + "type": "METRIC", + "uri": { + "http": "{{mapred-site/mapreduce.jobhistory.webapp.address}}", + "https": "{{mapred-site/mapreduce.jobhistory.webapp.https.address}}", + "kerberos_keytab": "{{mapred-site/mapreduce.jobhistory.webapp.spnego-keytab-file}}", + "kerberos_principal": "{{mapred-site/mapreduce.jobhistory.webapp.spnego-principal}}", + "https_property": "{{mapred-site/mapreduce.jobhistory.http.policy}}", + "https_property_value": "HTTPS_ONLY", + "connection_timeout": 5.0 + }, + "reporting": { + "ok": { + "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]" + }, + "warning": { + "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]", + "value": 3000 + }, + "critical": { + "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]", + "value": 5000 + }, + "units" : "ms" + }, + "jmx": { + "property_list": [ + "Hadoop:service=JobHistoryServer,name=RpcActivityForPort*/RpcQueueTimeAvgTime", + "Hadoop:service=JobHistoryServer,name=RpcActivityForPort*/RpcProcessingTimeAvgTime" + ], + "value": "{0}" + } + } + } + ] + }, + "YARN": { + "service": [ + { + "name": "yarn_nodemanager_webui_percent", + "label": "Percent NodeManagers Available", + "description": "This alert is triggered if the number of down NodeManagers in the cluster is greater than the configured critical threshold. It aggregates the results of NodeManager process checks.", + "interval": 1, + "scope": "SERVICE", + "enabled": true, + "source": { + "type": "AGGREGATE", + "alert_name": "yarn_nodemanager_webui", + "reporting": { + "ok": { + "text": "affected: [{1}], total: [{0}]" + }, + "warning": { + "text": "affected: [{1}], total: [{0}]", + "value": 10 + }, + "critical": { + "text": "affected: [{1}], total: [{0}]", + "value": 30 + }, + "units" : "%", + "type": "PERCENT" + } + } + } + ], + "NODEMANAGER": [ + { + "name": "yarn_nodemanager_webui", + "label": "NodeManager Web UI", + "description": "This host-level alert is triggered if the NodeManager Web UI is unreachable.", + "interval": 1, + "scope": "HOST", + "source": { + "type": "WEB", + "uri": { + "http": "{{yarn-site/yarn.nodemanager.webapp.address}}", + "https": "{{yarn-site/yarn.nodemanager.webapp.https.address}}", + "https_property": "{{yarn-site/yarn.http.policy}}", + "https_property_value": "HTTPS_ONLY", + "default_port": 8042, + "kerberos_keytab": "{{yarn-site/yarn.nodemanager.webapp.spnego-keytab-file}}", + "kerberos_principal": "{{yarn-site/yarn.nodemanager.webapp.spnego-principal}}", + "connection_timeout": 5.0 + }, + "reporting": { + "ok": { + "text": "HTTP {0} response in {2:.3f}s" + }, + "warning":{ + "text": "HTTP {0} response from {1} in {2:.3f}s ({3})" + }, + "critical": { + "text": "Connection failed to {1} ({3})" + } + } + } + }, + { + "name": "yarn_nodemanager_health", + "label": "NodeManager Health", + "description": "This host-level alert checks the node health property available from the NodeManager component.", + "interval": 1, + "scope": "HOST", + "enabled": true, + "source": { + "type": "SCRIPT", + "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py", + "parameters": [ + { + "name": "connection.timeout", + "display_name": "Connection Timeout", + "value": 5.0, + "type": "NUMERIC", + "description": "The maximum time before this alert is considered to be CRITICAL", + "units": "seconds", + "threshold": "CRITICAL" + } + ] + } + } + ], + "RESOURCEMANAGER": [ + { + "name": "yarn_resourcemanager_webui", + "label": "ResourceManager Web UI", + "description": "This host-level alert is triggered if the ResourceManager Web UI is unreachable.", + "interval": 1, + "scope": "ANY", + "source": { + "type": "WEB", + "uri": { + "http": "{{yarn-site/yarn.resourcemanager.webapp.address}}", + "https": "{{yarn-site/yarn.resourcemanager.webapp.https.address}}", + "https_property": "{{yarn-site/yarn.http.policy}}", + "https_property_value": "HTTPS_ONLY", + "kerberos_keytab": "{{yarn-site/yarn.resourcemanager.webapp.spnego-keytab-file}}", + "kerberos_principal": "{{yarn-site/yarn.resourcemanager.webapp.spnego-principal}}", + "connection_timeout": 5.0, + "high_availability": { + "alias_key" : "{{yarn-site/yarn.resourcemanager.ha.rm-ids}}", + "http_pattern" : "{{yarn-site/yarn.resourcemanager.webapp.address.{{alias}}}}", + "https_pattern" : "{{yarn-site/yarn.resourcemanager.webapp.https.address.{{alias}}}}" + } + }, + "reporting": { + "ok": { + "text": "HTTP {0} response in {2:.3f}s" + }, + "warning":{ + "text": "HTTP {0} response from {1} in {2:.3f}s ({3})" + }, + "critical": { + "text": "Connection failed to {1} ({3})" + } + } + } + }, + { + "name": "yarn_resourcemanager_cpu", + "label": "ResourceManager CPU Utilization", + "description": "This host-level alert is triggered if CPU utilization of the ResourceManager exceeds certain warning and critical thresholds. It checks the ResourceManager JMX Servlet for the SystemCPULoad property. The threshold values are in percent.", + "interval": 5, + "scope": "ANY", + "enabled": true, + "source": { + "type": "METRIC", + "uri": { + "http": "{{yarn-site/yarn.resourcemanager.webapp.address}}", + "https": "{{yarn-site/yarn.resourcemanager.webapp.https.address}}", + "kerberos_keytab": "{{yarn-site/yarn.resourcemanager.webapp.spnego-keytab-file}}", + "kerberos_principal": "{{yarn-site/yarn.resourcemanager.webapp.spnego-principal}}", + "https_property": "{{yarn-site/yarn.http.policy}}", + "https_property_value": "HTTPS_ONLY", + "connection_timeout": 5.0, + "high_availability": { + "alias_key" : "{{yarn-site/yarn.resourcemanager.ha.rm-ids}}", + "http_pattern" : "{{yarn-site/yarn.resourcemanager.webapp.address.{{alias}}}}", + "https_pattern" : "{{yarn-site/yarn.resourcemanager.webapp.https.address.{{alias}}}}" + } + }, + "reporting": { + "ok": { + "text": "{1} CPU, load {0:.1%}" + }, + "warning": { + "text": "{1} CPU, load {0:.1%}", + "value": 200 + }, + "critical": { + "text": "{1} CPU, load {0:.1%}", + "value": 250 + }, + "units" : "%", + "type": "PERCENT" + }, + "jmx": { + "property_list": [ + "java.lang:type=OperatingSystem/SystemCpuLoad", + "java.lang:type=OperatingSystem/AvailableProcessors" + ], + "value": "{0} * 100" + } + } + }, + { + "name": "yarn_resourcemanager_rpc_latency", + "label": "ResourceManager RPC Latency", + "description": "This host-level alert is triggered if the ResourceManager operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for ResourceManager operations. The threshold values are in milliseconds.", + "interval": 5, + "scope": "ANY", + "enabled": true, + "source": { + "type": "METRIC", + "uri": { + "http": "{{yarn-site/yarn.resourcemanager.webapp.address}}", + "https": "{{yarn-site/yarn.resourcemanager.webapp.https.address}}", + "kerberos_keytab": "{{yarn-site/yarn.resourcemanager.webapp.spnego-keytab-file}}", + "kerberos_principal": "{{yarn-site/yarn.resourcemanager.webapp.spnego-principal}}", + "https_property": "{{yarn-site/yarn.http.policy}}", + "https_property_value": "HTTPS_ONLY", + "connection_timeout": 5.0, + "high_availability": { + "alias_key" : "{{yarn-site/yarn.resourcemanager.ha.rm-ids}}", + "http_pattern" : "{{yarn-site/yarn.resourcemanager.webapp.address.{{alias}}}}", + "https_pattern" : "{{yarn-site/yarn.resourcemanager.webapp.https.address.{{alias}}}}" + } + }, + "reporting": { + "ok": { + "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]" + }, + "warning": { + "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]", + "value": 3000 + }, + "critical": { + "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]", + "value": 5000 + }, + "units" : "ms" + }, + "jmx": { + "property_list": [ + "Hadoop:service=ResourceManager,name=RpcActivityForPort*/RpcQueueTimeAvgTime", + "Hadoop:service=ResourceManager,name=RpcActivityForPort*/RpcProcessingTimeAvgTime" + ], + "value": "{0}" + } + } + }, + { + "name": "nodemanager_health_summary", + "label": "NodeManager Health Summary", + "description": "This service-level alert is triggered if there are unhealthy NodeManagers", + "interval": 1, + "scope": "SERVICE", + "enabled": true, + "source": { + "type": "SCRIPT", + "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py", + "parameters": [ + { + "name": "connection.timeout", + "display_name": "Connection Timeout", + "value": 5.0, + "type": "NUMERIC", + "description": "The maximum time before this alert is considered to be CRITICAL", + "units": "seconds", + "threshold": "CRITICAL" + } + ] + } + } + ], + "APP_TIMELINE_SERVER": [ + { + "name": "yarn_app_timeline_server_webui", + "label": "App Timeline Web UI", + "description": "This host-level alert is triggered if the App Timeline Server Web UI is unreachable.", + "interval": 1, + "scope": "ANY", + "source": { + "type": "WEB", + "uri": { + "http": "{{yarn-site/yarn.timeline-service.webapp.address}}/ws/v1/timeline", + "https": "{{yarn-site/yarn.timeline-service.webapp.https.address}}/ws/v1/timeline", + "https_property": "{{yarn-site/yarn.http.policy}}", + "https_property_value": "HTTPS_ONLY", + "kerberos_keytab": "{{yarn-site/yarn.timeline-service.http-authentication.kerberos.keytab}}", + "kerberos_principal": "{{yarn-site/yarn.timeline-service.http-authentication.kerberos.principal}}", + "connection_timeout": 5.0 + }, + "reporting": { + "ok": { + "text": "HTTP {0} response in {2:.3f}s" + }, + "warning":{ + "text": "HTTP {0} response from {1} in {2:.3f}s ({3})" + }, + "critical": { + "text": "Connection failed to {1} ({3})" + } + } + } + } + ] + } +} http://git-wip-us.apache.org/repos/asf/ambari/blob/3bf5d32d/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-env.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-env.xml b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-env.xml new file mode 100644 index 0000000..2ac0bff --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-env.xml @@ -0,0 +1,104 @@ + + + + + + mapred_log_dir_prefix + /var/log/hadoop-mapreduce + Mapreduce Log Dir Prefix + Mapreduce Log Dir Prefix + + directory + false + + + + + mapred_pid_dir_prefix + /var/run/hadoop-mapreduce + Mapreduce PID Dir Prefix + Mapreduce PID Dir Prefix + + directory + false + true + + + + + mapred_user + Mapreduce User + mapred + USER + Mapreduce User + + user + false + + + + + jobhistory_heapsize + History Server heap size + 900 + Value for JobHistoryServer heap_size variable in hadoop-env.sh + + MB + int + + + + + mapred_user_nofile_limit + 32768 + Max open files limit setting for MAPREDUCE user. + + + + mapred_user_nproc_limit + 65536 + Max number of processes limit setting for MAPREDUCE user. + + + + content + mapred-env template + This is the jinja template for mapred-env.sh file + + # export JAVA_HOME=/home/y/libexec/jdk1.6.0/ + + export HADOOP_JOB_HISTORYSERVER_HEAPSIZE={{jobhistory_heapsize}} + + export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA + + #export HADOOP_JOB_HISTORYSERVER_OPTS= + #export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored. $HADOOP_MAPRED_HOME/logs by default. + #export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger. + #export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default. + #export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default + #export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0. + + + content + + + + http://git-wip-us.apache.org/repos/asf/ambari/blob/3bf5d32d/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-logsearch-conf.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-logsearch-conf.xml b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-logsearch-conf.xml new file mode 100644 index 0000000..3c0abbf --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-logsearch-conf.xml @@ -0,0 +1,80 @@ + + + + + + service_name + Service name + Service name for Logsearch Portal (label) + MapReduce + + + + component_mappings + Component mapping + Logsearch component logid mapping list (e.g.: COMPONENT1:logid1,logid2;COMPONENT2:logid3) + HISTORYSERVER:mapred_historyserver + + + + content + Logfeeder Config + Metadata jinja template for Logfeeder which contains grok patterns for reading service specific logs. + +{ + "input":[ + { + "type":"mapred_historyserver", + "rowtype":"service", + "path":"{{default('/configurations/mapred-env/mapred_log_dir_prefix', '/var/log/hadoop')}}/{{default('configurations/mapred-env/mapred_user', 'mapred')}}/mapred-{{default('configurations/mapred-env/mapred_user', 'mapred')}}-historyserver*.log" + } + ], + "filter":[ + { + "filter":"grok", + "conditions":{ + "fields":{ + "type":[ + "mapred_historyserver" + ] + } + }, + "log4j_format":"%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n", + "multiline_pattern":"^(%{TIMESTAMP_ISO8601:logtime})", + "message_pattern":"(?m)^%{TIMESTAMP_ISO8601:logtime}%{SPACE}%{LOGLEVEL:level}%{SPACE}%{JAVACLASS:logger_name}%{SPACE}\\(%{JAVAFILE:file}:%{JAVAMETHOD:method}\\(%{INT:line_number}\\)\\)%{SPACE}-%{SPACE}%{GREEDYDATA:log_message}", + "post_map_values":{ + "logtime":{ + "map_date":{ + "target_date_pattern":"yyyy-MM-dd HH:mm:ss,SSS" + } + } + } + } + ] + } + + + content + false + + + + http://git-wip-us.apache.org/repos/asf/ambari/blob/3bf5d32d/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-site.xml b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-site.xml new file mode 100644 index 0000000..e51107a --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration-mapred/mapred-site.xml @@ -0,0 +1,540 @@ + + + + + + + + mapreduce.task.io.sort.mb + 358 + + The total amount of buffer memory to use while sorting files, in megabytes. + By default, gives each merge stream 1MB, which should minimize seeks. + + Sort Allocation Memory + + int + 0 + 2047 + MB + 1 + + + + mapred-site + mapreduce.map.memory.mb + + + + + + mapreduce.map.sort.spill.percent + 0.7 + + The soft limit in the serialization buffer. Once reached, a thread will + begin to spill the contents to disk in the background. Note that + collection will not block if this threshold is exceeded while a spill + is already in progress, so spills may be larger than this threshold when + it is set to less than .5 + + + + + mapreduce.task.io.sort.factor + 100 + + The number of streams to merge at once while sorting files. + This determines the number of open file handles. + + + + + + mapreduce.cluster.administrators + hadoop + + Administrators for MapReduce applications. + + + + + mapreduce.reduce.shuffle.parallelcopies + 30 + + The default number of parallel transfers run by reduce during + the copy(shuffle) phase. + + + + + mapreduce.map.speculative + false + + If true, then multiple instances of some map tasks + may be executed in parallel. + + + + + mapreduce.reduce.speculative + false + + If true, then multiple instances of some reduce tasks may be + executed in parallel. + + + + + mapreduce.job.reduce.slowstart.completedmaps + 0.05 + + Fraction of the number of maps in the job which should be complete before + reduces are scheduled for the job. + + + + + mapreduce.job.counters.max + 130 + + Limit on the number of counters allowed per job. + + + + + mapreduce.reduce.shuffle.merge.percent + 0.66 + + The usage threshold at which an in-memory merge will be + initiated, expressed as a percentage of the total memory allocated to + storing in-memory map outputs, as defined by + mapreduce.reduce.shuffle.input.buffer.percent. + + + + + mapreduce.reduce.shuffle.input.buffer.percent + 0.7 + + The percentage of memory to be allocated from the maximum heap + size to storing map outputs during the shuffle. + + + + + mapreduce.output.fileoutputformat.compress.type + BLOCK + + If the job outputs are to compressed as SequenceFiles, how should + they be compressed? Should be one of NONE, RECORD or BLOCK. + + + + + mapreduce.reduce.input.buffer.percent + 0.0 + + The percentage of memory- relative to the maximum heap size- to + retain map outputs during the reduce. When the shuffle is concluded, any + remaining map outputs in memory must consume less than this threshold before + the reduce can begin. + + + + + + mapreduce.map.output.compress + false + + Should the outputs of the maps be compressed before being sent across the network. Uses SequenceFile compression. + + + + + mapreduce.task.timeout + 300000 + + The number of milliseconds before a task will be + terminated if it neither reads an input, writes an output, nor + updates its status string. + + + + + mapreduce.map.memory.mb + 512 + Virtual memory for single Map task + Map Memory + + int + 512 + 5120 + MB + 256 + + + + yarn-site + yarn.scheduler.maximum-allocation-mb + + + yarn-site + yarn.scheduler.minimum-allocation-mb + + + + + + mapreduce.reduce.memory.mb + 1024 + Virtual memory for single Reduce task + Reduce Memory + + int + 512 + 5120 + MB + 256 + + + + yarn-site + yarn.scheduler.maximum-allocation-mb + + + yarn-site + yarn.scheduler.minimum-allocation-mb + + + + + + mapreduce.shuffle.port + 13562 + + Default port that the ShuffleHandler will run on. + ShuffleHandler is a service run at the NodeManager to facilitate + transfers of intermediate Map outputs to requesting Reducers. + + + + + mapreduce.jobhistory.intermediate-done-dir + /mr-history/tmp + + Directory where history files are written by MapReduce jobs. + + + + + mapreduce.jobhistory.done-dir + /mr-history/done + + Directory where history files are managed by the MR JobHistory Server. + + NOT_MANAGED_HDFS_PATH + + + + mapreduce.jobhistory.address + localhost:10020 + Enter your JobHistoryServer hostname. + + + + mapreduce.jobhistory.webapp.address + localhost:19888 + Enter your JobHistoryServer hostname. + + + + mapreduce.framework.name + yarn + + The runtime framework for executing MapReduce jobs. Can be one of local, + classic or yarn. + + + + + yarn.app.mapreduce.am.staging-dir + /user + + The staging dir used while submitting jobs. + + + + + yarn.app.mapreduce.am.resource.mb + 512 + The amount of memory the MR AppMaster needs. + AppMaster Memory + + int + 512 + 5120 + MB + 256 + + + + yarn-site + yarn.scheduler.maximum-allocation-mb + + + yarn-site + yarn.scheduler.minimum-allocation-mb + + + + + + yarn.app.mapreduce.am.command-opts + -Xmx410m + + Java opts for the MR App Master processes. + The following symbol, if present, will be interpolated: @taskid@ is replaced + by current TaskID. Any other occurrences of '@' will go unchanged. + For example, to enable verbose gc logging to a file named for the taskid in + /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of: + -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc + + Usage of -Djava.library.path can cause programs to no longer function if + hadoop native libraries are used. These values should instead be set as part + of LD_LIBRARY_PATH in the map / reduce JVM env using the mapreduce.map.env and + mapreduce.reduce.env config settings. + + MR AppMaster Java Heap Size + + + mapred-site + yarn.app.mapreduce.am.resource.mb + + + + + + yarn.app.mapreduce.am.admin-command-opts + -server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN + + Java opts for the MR App Master processes for admin purposes. + It will appears before the opts set by yarn.app.mapreduce.am.command-opts and + thus its options can be overridden user. + + Usage of -Djava.library.path can cause programs to no longer function if + hadoop native libraries are used. These values should instead be set as part + of LD_LIBRARY_PATH in the map / reduce JVM env using the mapreduce.map.env and + mapreduce.reduce.env config settings. + + MR AppMaster Java Heap Size + + + mapred-site + yarn.app.mapreduce.am.resource.mb + + + + + + yarn.app.mapreduce.am.log.level + INFO + MR App Master process log level. + + + + mapreduce.admin.map.child.java.opts + -server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN + This property stores Java options for map tasks. + + + + mapreduce.admin.reduce.child.java.opts + -server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN + This property stores Java options for reduce tasks. + + + + mapreduce.application.classpath + $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/* + + CLASSPATH for MR applications. A comma-separated list of CLASSPATH + entries. + + + + + mapreduce.am.max-attempts + 2 + + The maximum number of application attempts. It is a + application-specific setting. It should not be larger than the global number + set by resourcemanager. Otherwise, it will be override. The default number is + set to 2, to allow at least one retry for AM. + + + + + mapreduce.map.java.opts + -Xmx410m + + Larger heap-size for child jvms of maps. + + MR Map Java Heap Size + + + mapred-site + mapreduce.map.memory.mb + + + + + + mapreduce.reduce.java.opts + -Xmx756m + + Larger heap-size for child jvms of reduces. + + MR Reduce Java Heap Size + + + mapred-site + mapreduce.reduce.memory.mb + + + + + + mapreduce.map.log.level + INFO + + The logging level for the map task. The allowed levels are: + OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and ALL. + + + + + mapreduce.reduce.log.level + INFO + + The logging level for the reduce task. The allowed levels are: + OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and ALL. + + + + + mapreduce.admin.user.env + LD_LIBRARY_PATH=/usr/lib/hadoop/lib/native:/usr/lib/hadoop/lib/native/Linux-amd64-64 + + Additional execution environment entries for map and reduce task processes. + This is not an additive property. You must preserve the original value if + you want your map and reduce tasks to have access to native libraries (compression, etc) + + + + + mapreduce.output.fileoutputformat.compress + false + + Should the job outputs be compressed? + + + + + mapreduce.jobhistory.http.policy + HTTP_ONLY + + This configures the HTTP endpoint for JobHistoryServer web UI. + The following values are supported: - HTTP_ONLY : Service is provided only + on http - HTTPS_ONLY : Service is provided only on https + + + + + mapreduce.job.queuename + default + + Queue to which a job is submitted. + + + + capacity-scheduler + yarn.scheduler.capacity.root.queues + + + + + + + + mapreduce.reduce.shuffle.fetch.retry.enabled + 1 + + + + + mapreduce.reduce.shuffle.fetch.retry.interval-ms + 1000 + + + + + mapreduce.reduce.shuffle.fetch.retry.timeout-ms + 30000 + + + + + mapreduce.job.emit-timeline-data + false + + + + + mapreduce.jobhistory.bind-host + 0.0.0.0 + + + + + + + mapreduce.jobhistory.recovery.enable + true + Enable the history server to store server state and recover + server state upon startup. If enabled then + mapreduce.jobhistory.recovery.store.class must be specified. + + + + + mapreduce.jobhistory.recovery.store.class + org.apache.hadoop.mapreduce.v2.hs.HistoryServerLeveldbStateStoreService + The HistoryServerStateStoreService class to store history server + state for recovery. + + + + + mapreduce.jobhistory.recovery.store.leveldb.path + /hadoop/mapreduce/jhs + The URI where history server state will be stored if HistoryServerLeveldbSystemStateStoreService + is configured as the recovery storage class. + + + + http://git-wip-us.apache.org/repos/asf/ambari/blob/3bf5d32d/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/capacity-scheduler.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/capacity-scheduler.xml b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/capacity-scheduler.xml new file mode 100644 index 0000000..320a629 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/capacity-scheduler.xml @@ -0,0 +1,183 @@ + + + + + yarn.scheduler.capacity.maximum-applications + 10000 + + Maximum number of applications that can be pending and running. + + + + + yarn.scheduler.capacity.maximum-am-resource-percent + 0.2 + + Maximum percent of resources in the cluster which can be used to run + application masters i.e. controls number of concurrent running + applications. + + + + + yarn.scheduler.capacity.root.queues + default + + The queues at the this level (root is the root queue). + + + + + yarn.scheduler.capacity.root.capacity + 100 + + The total capacity as a percentage out of 100 for this queue. + If it has child queues then this includes their capacity as well. + The child queues capacity should add up to their parent queue's capacity + or less. + + + + + yarn.scheduler.capacity.root.default.capacity + 100 + Default queue target capacity. + + + + yarn.scheduler.capacity.root.default.user-limit-factor + 1 + + Default queue user limit a percentage from 0.0 to 1.0. + + + + + yarn.scheduler.capacity.root.default.maximum-capacity + 100 + + The maximum capacity of the default queue. + + + + + yarn.scheduler.capacity.root.default.state + RUNNING + + The state of the default queue. State can be one of RUNNING or STOPPED. + + + + + yarn.scheduler.capacity.root.default.acl_submit_applications + * + + The ACL of who can submit jobs to the default queue. + + + + + yarn.scheduler.capacity.root.default.acl_administer_jobs + * + + The ACL of who can administer jobs on the default queue. + + + + + yarn.scheduler.capacity.root.acl_administer_queue + * + + The ACL for who can administer this queue i.e. change sub-queue + allocations. + + + + + yarn.scheduler.capacity.node-locality-delay + 40 + + Number of missed scheduling opportunities after which the CapacityScheduler + attempts to schedule rack-local containers. + Typically this should be set to number of nodes in the cluster, By default is setting + approximately number of nodes in one rack which is 40. + + + + + yarn.scheduler.capacity.default.minimum-user-limit-percent + 100 + + Default minimum queue resource limit depends on the number of users who have submitted applications. + + + + + + + yarn.scheduler.capacity.resource-calculator + + The ResourceCalculator implementation to be used to compare Resources in the scheduler. + The default i.e. org.apache.hadoop.yarn.util.resource.DefaultResourseCalculator only uses + Memory while DominantResourceCalculator uses Dominant-resource to compare multi-dimensional + resources such as Memory, CPU etc. A Java ResourceCalculator class name is expected. + + org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator + CPU Scheduling + + value-list + + + org.apache.hadoop.yarn.util.resource.DominantResourceCalculator + + + + org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator + + + + 1 + + + + + yarn.scheduler.capacity.root.accessible-node-labels + * + + + + + + + + + capacity-scheduler + Enter key=value (one per line) for all properties of capacity-scheduler.xml + + + hive-interactive-env + enable_hive_interactive + + + hive-interactive-env + llap_queue_capacity + + + + + http://git-wip-us.apache.org/repos/asf/ambari/blob/3bf5d32d/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/ranger-yarn-audit.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/ranger-yarn-audit.xml b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/ranger-yarn-audit.xml new file mode 100644 index 0000000..a6b1baa --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/ranger-yarn-audit.xml @@ -0,0 +1,177 @@ + + + + + xasecure.audit.is.enabled + true + Is Audit enabled? + + + + xasecure.audit.destination.db + false + Audit to DB + Is Audit to DB enabled? + + boolean + + + + ranger-env + xasecure.audit.destination.db + + + + + + xasecure.audit.destination.db.jdbc.url + {{audit_jdbc_url}} + Audit DB JDBC URL + + + + xasecure.audit.destination.db.user + {{xa_audit_db_user}} + Audit DB JDBC User + + + + xasecure.audit.destination.db.password + crypted + PASSWORD + Audit DB JDBC Password + + password + + + + + xasecure.audit.destination.db.jdbc.driver + {{jdbc_driver}} + Audit DB JDBC Driver + + + + xasecure.audit.credential.provider.file + jceks://file{{credential_file}} + Credential file store + + + + xasecure.audit.destination.db.batch.filespool.dir + /var/log/hadoop/yarn/audit/db/spool + /var/log/hadoop/yarn/audit/db/spool + + + + xasecure.audit.destination.hdfs + true + Audit to HDFS + Is Audit to HDFS enabled? + + boolean + + + + ranger-env + xasecure.audit.destination.hdfs + + + + + + xasecure.audit.destination.hdfs.dir + hdfs://NAMENODE_HOSTNAME:8020/ranger/audit + HDFS folder to write audit to, make sure the service user has requried permissions + + + ranger-env + xasecure.audit.destination.hdfs.dir + + + + + + xasecure.audit.destination.hdfs.batch.filespool.dir + /var/log/hadoop/yarn/audit/hdfs/spool + /var/log/hadoop/yarn/audit/hdfs/spool + + + + xasecure.audit.destination.solr + false + Audit to SOLR + Is Solr audit enabled? + + boolean + + + + ranger-env + xasecure.audit.destination.solr + + + + + + xasecure.audit.destination.solr.urls + + Solr URL + + true + + + + ranger-admin-site + ranger.audit.solr.urls + + + + + + xasecure.audit.destination.solr.zookeepers + NONE + Solr Zookeeper string + + + ranger-admin-site + ranger.audit.solr.zookeepers + + + + + + xasecure.audit.destination.solr.batch.filespool.dir + /var/log/hadoop/yarn/audit/solr/spool + /var/log/hadoop/yarn/audit/solr/spool + + + + xasecure.audit.provider.summary.enabled + false + Audit provider summary enabled + Enable Summary audit? + + boolean + + + + http://git-wip-us.apache.org/repos/asf/ambari/blob/3bf5d32d/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/ranger-yarn-plugin-properties.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/ranger-yarn-plugin-properties.xml b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/ranger-yarn-plugin-properties.xml new file mode 100644 index 0000000..97867cc --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0/configuration/ranger-yarn-plugin-properties.xml @@ -0,0 +1,82 @@ + + + + + policy_user + ambari-qa + Policy user for YARN + This user must be system user and also present at Ranger admin portal + + + + hadoop.rpc.protection + + Used for repository creation on ranger admin + + true + + + + + common.name.for.certificate + + Common name for certificate, this value should match what is specified in repo within ranger admin + + true + + + + + ranger-yarn-plugin-enabled + No + Enable Ranger for YARN + Enable ranger yarn plugin ? + + + ranger-env + ranger-yarn-plugin-enabled + + + + boolean + false + + + + + REPOSITORY_CONFIG_USERNAME + yarn + Ranger repository config user + Used for repository creation on ranger admin + + + + REPOSITORY_CONFIG_PASSWORD + yarn + Ranger repository config password + PASSWORD + Used for repository creation on ranger admin + + password + + + +