Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 829E4200BCA for ; Mon, 21 Nov 2016 21:06:33 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 7D0D3160AF9; Mon, 21 Nov 2016 20:06:33 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 7A8D4160AEF for ; Mon, 21 Nov 2016 21:06:32 +0100 (CET) Received: (qmail 41307 invoked by uid 500); 21 Nov 2016 20:06:31 -0000 Mailing-List: contact commits-help@ambari.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: ambari-dev@ambari.apache.org Delivered-To: mailing list commits@ambari.apache.org Received: (qmail 41298 invoked by uid 99); 21 Nov 2016 20:06:31 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 21 Nov 2016 20:06:31 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 3ABCBE0999; Mon, 21 Nov 2016 20:06:31 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: alejandro@apache.org To: commits@ambari.apache.org Message-Id: <0025f26f436d401da2aa38f8e94d1bd4@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: ambari git commit: AMBARI-18940. Perf: Fix bugs in deploy-gce-perf-cluster.py to generate correct config file (alejandro) Date: Mon, 21 Nov 2016 20:06:31 +0000 (UTC) archived-at: Mon, 21 Nov 2016 20:06:33 -0000 Repository: ambari Updated Branches: refs/heads/branch-2.5 0c5850a2e -> 52347d2e4 AMBARI-18940. Perf: Fix bugs in deploy-gce-perf-cluster.py to generate correct config file (alejandro) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/52347d2e Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/52347d2e Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/52347d2e Branch: refs/heads/branch-2.5 Commit: 52347d2e4e21a0e1dbb518077ebe36ba5f7ed0a6 Parents: 0c5850a Author: Alejandro Fernandez Authored: Mon Nov 21 12:06:58 2016 -0800 Committer: Alejandro Fernandez Committed: Mon Nov 21 12:06:58 2016 -0800 ---------------------------------------------------------------------- contrib/utils/perf/deploy-gce-perf-cluster.py | 230 +++++++++++++-------- 1 file changed, 145 insertions(+), 85 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/52347d2e/contrib/utils/perf/deploy-gce-perf-cluster.py ---------------------------------------------------------------------- diff --git a/contrib/utils/perf/deploy-gce-perf-cluster.py b/contrib/utils/perf/deploy-gce-perf-cluster.py index da45f8f..95ed98f 100644 --- a/contrib/utils/perf/deploy-gce-perf-cluster.py +++ b/contrib/utils/perf/deploy-gce-perf-cluster.py @@ -27,18 +27,20 @@ import traceback import re import socket -cluster_name="perf-cluster" -ambari_repo_file_url="http://s3.amazonaws.com/dev.hortonworks.com/ambari/centos6/2.x/latest/trunk/ambaribn.repo" +cluster_prefix = "perf" +ambari_repo_file_url = "http://s3.amazonaws.com/dev.hortonworks.com/ambari/centos6/2.x/latest/trunk/ambaribn.repo" -public_hostname_script="foo" -hostname_script="foo" +public_hostname_script = "foo" +hostname_script = "foo" -start_number=1 -number_of_agents_on_host=50 +NUMBER_OF_AGENTS_ON_HOST = 50 class SSH: - """ Ssh implementation of this """ + """ + Ssh implementation of this + """ + def __init__(self, user, sshkey_file, host, command, custom_option='', errorMessage = None): self.user = user self.sshkey_file = sshkey_file @@ -46,8 +48,6 @@ class SSH: self.command = command self.errorMessage = errorMessage self.custom_option = custom_option - pass - def run(self): sshcommand = ["ssh", @@ -91,12 +91,15 @@ class SSH: print "SSH command execution finished" - return {"exitstatus": sshstat.returncode, "log": log, "errormsg": errorMsg} + return {"exitstatus": sshstat.returncode, "log": log, "errormsg": errorMsg} class SCP: - """ SCP implementation that is thread based. The status can be returned using - status val """ + """ + SCP implementation that is thread based. The status can be returned using + status val + """ + def __init__(self, user, sshkey_file, host, inputFile, remote, errorMessage = None): self.user = user self.sshkey_file = sshkey_file @@ -104,8 +107,6 @@ class SCP: self.inputFile = inputFile self.remote = remote self.errorMessage = errorMessage - pass - def run(self): scpcommand = ["scp", @@ -115,7 +116,6 @@ class SCP: "-o", "StrictHostKeyChecking=no", "-i", self.sshkey_file, self.inputFile, self.user + "@" + self.host + ":" + self.remote] - i = 1 while True: try: @@ -162,27 +162,33 @@ def main(): parser.add_argument('--key', type=str, action='store', help='Path to GCE ssh key.') - parser.add_argument('--cluster-prefix', type=str, - action='store', help='Cluster name prefix.') + parser.add_argument('--cluster-suffix', type=str, + action='store', help='Cluster name suffix.') parser.add_argument('--agent-prefix', type=str, action='store', help='Agent name prefix.') parser.add_argument('--agents-count', type=int, - action='store', help='Agents count for whole cluster(multiples of 50).') + action='store', help='Agents count for whole cluster (multiples of 50).') if len(sys.argv) <= 1: parser.print_help() - else: - args = parser.parse_args() - do_work(args) + sys.exit(-1) + + args = parser.parse_args() + do_work(args) -# base method which process cluster deployment def deploy_cluster(args): - number_of_nodes=args.agents_count/number_of_agents_on_host + """ + Process cluster deployment + :param args: Command line args. + """ + # When dividing, need to get the ceil. + number_of_nodes = ((args.agents_count - 1) / NUMBER_OF_AGENTS_ON_HOST) + 1 + # trying to create cluster with needed params - print "Creating cluster {0}-{1} with {2} large nodes on centos6...".format(args.cluster_prefix, cluster_name, str(number_of_nodes)) - execute_command(args, args.controller, "/usr/sbin/gce up {0}-{1} {2} --centos6 --large".format(args.cluster_prefix, cluster_name, str(number_of_nodes)), + print "Creating cluster {0}-{1} with {2} large nodes on centos6...".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)) + execute_command(args, args.controller, "/usr/sbin/gce up {0}-{1} {2} --centos6 --large".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)), "Failed to create cluster, probably not enough resources!", "-tt") # VMs are not accessible immediately @@ -190,19 +196,20 @@ def deploy_cluster(args): # getting list of vms information like hostname and ip address print "Getting list of virtual machines from cluster..." + # Dictionary from host name to IP vms = get_vms_list(args) # check number of nodes in cluster to be the same as user asked print "Checking count of created nodes in cluster..." if not vms or len(vms) < number_of_nodes: - raise Exception("Can not bring up enough nodes. Requested {0}, but got: {1}. Probably not enough resources!".format(number_of_nodes, len(vms))) + raise Exception("Cannot bring up enough nodes. Requested {0}, but got {1}. Probably not enough resources!".format(number_of_nodes, len(vms))) print "GCE cluster was successfully created!" pretty_print_vms(vms) # installing/starting ambari-server and ambari-agents on each host server_host_name = sorted(vms.items())[0][0] - server_installed=False + server_installed = False print "Creating server.sh script (which will be executed on server to install/configure/start ambari-server and ambari-agent)..." create_server_script(args, server_host_name) @@ -211,14 +218,33 @@ def deploy_cluster(args): create_agent_script(args, server_host_name) time.sleep(10) + + # If the user asks for a number of agents that is not a multiple of 50, then only create how many are needed instead + # of 50 on every VM. + num_agents_left_to_create = args.agents_count + + start_num = 1 for (hostname, ip) in sorted(vms.items()): + num_agents_on_this_host = min(num_agents_left_to_create, NUMBER_OF_AGENTS_ON_HOST) + print "==========================" - print "Working on {0}".format(hostname) + print "Working on VM {0} that will contain hosts %d - %d".format(hostname, start_num, start_num + num_agents_on_this_host - 1) + + # The agent multiplier config will be different on each VM. + + cmd_generate_multiplier_conf = "mkdir -p /etc/ambari-agent/conf/ ; printf \"start={0}\\nnum={1}\\nprefix={2}\" > /etc/ambari-agent/conf/agent-multiplier.conf".format(start_num, num_agents_on_this_host, args.agent_prefix) + start_num += num_agents_on_this_host + num_agents_left_to_create -= num_agents_on_this_host + if not server_installed: remote_path = "/server.sh" local_path = "server.sh" print "Copying server.sh to {0}...".format(hostname) put_file(args, ip, local_path, remote_path, "Failed to copy file!") + + print "Generating agent-multiplier.conf" + execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname)) + print "Executing remote ssh command (set correct permissions and start executing server.sh in separate process) on {0}...".format(hostname) execute_command(args, ip, "cd /; chmod 777 server.sh; nohup ./server.sh >/server.log 2>&1 &", "Install/configure/start server script failed!") @@ -228,6 +254,10 @@ def deploy_cluster(args): local_path = "agent.sh" print "Copying agent.sh to {0}...".format(hostname) put_file(args, ip, local_path, remote_path, "Failed to copy file!") + + print "Generating agent-multiplier.conf" + execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname)) + print "Executing remote ssh command (set correct permissions and start executing agent.sh in separate process) on {0}...".format(hostname) execute_command(args, ip, "cd /; chmod 777 agent.sh; nohup ./agent.sh >/agent.log 2>&1 &", "Install/configure start agent script failed!") @@ -235,75 +265,90 @@ def deploy_cluster(args): print "All scripts where successfully copied and started on all hosts. " \ "\nPay attention that server.sh script need 5 minutes to finish and agent.sh need 3 minutes!" -# check if all required params were passed by user -# if all needed params available then start cluster deploy + def do_work(args): + """ + Check that all required args are passed in. If so, deploy the cluster. + :param args: Command line args + """ if not args.controller: - raise Failure("GCE controller ip address not defined!") + raise Exception("GCE controller ip address is not defined!") if not args.key: - raise Failure("Path to gce ssh key not defined!") + raise Exception("Path to gce ssh key is not defined!") - if not args.cluster_prefix: - raise Failure("Cluster name prefix not defined!") + if not args.cluster_suffix: + raise Exception("Cluster name suffix is not defined!") if not args.agent_prefix: - raise Failure("Agent name prefix not defined!") + raise Exception("Agent name prefix is not defined!") if not args.agents_count: - raise Failure("Agents count for whole cluster(multiples of 50) not defined!") + raise Exception("Agents count for whole cluster is not defined (will put 50 Agents per VM)!") deploy_cluster(args) -# creating server.sh script in the same dir where current script is located -# server.sh script will install, configure and start ambari-server and ambari-agent on host -def create_server_script(args, server_host_name): - file = open("server.sh", "w") - - file.write("#!/bin/bash\n") - - file.write("wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url)) - file.write("yum clean all; yum install git ambari-server ambari-agent -y\n") - file.write("cd /home; git clone https://github.com/apache/ambari.git\n") - - file.write("cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-server/resources/stacks/PERF\n") - file.write("cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n") - - file.write("ambari-server setup -s\n") - file.write("sed -i -e 's/false/true/g' /var/lib/ambari-server/resources/stacks/PERF/1.0/metainfo.xml\n") - file.write("ambari-server start --skip-database-check\n") - file.write("sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name)) - file.write("sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script)) - file.write("printf \"start={0}\\nnum={1}\\nprefix={2}\" > /etc/ambari-agent/conf/agent-multiplier.conf\n".format(start_number, number_of_agents_on_host, args.agent_prefix)) - file.write("python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n") - - file.write("exit 0") +def create_server_script(args, server_host_name): + """ + Creating server.sh script in the same dir where current script is located + server.sh script will install, configure and start ambari-server and ambari-agent on host + :param args: Command line args + :param server_host_name: Server host name + """ + + contents = "#!/bin/bash\n" + \ + "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \ + "yum clean all; yum install git ambari-server ambari-agent -y\n" + \ + "cd /home; git clone https://github.com/apache/ambari.git\n" + \ + "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-server/resources/stacks/PERF\n" + \ + "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \ + "ambari-server setup -s\n" + \ + "sed -i -e 's/false/true/g' /var/lib/ambari-server/resources/stacks/PERF/1.0/metainfo.xml\n" + \ + "ambari-server start --skip-database-check\n" + \ + "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \ + "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \ + "python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n" + \ + "exit 0" + + with open("server.sh", "w") as f: + f.write(contents) - file.close() -# creating agent.sh script in the same dir where current script is located -# agent.sh script will install, configure and start ambari-agent on host def create_agent_script(args, server_host_name): - file = open("agent.sh", "w") - - file.write("#!/bin/bash\n") - - file.write("wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url)) - file.write("yum clean all; yum install git ambari-agent -y\n") - file.write("cd /home; git clone https://github.com/apache/ambari.git\n") - file.write("cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n") + """ + Creating agent.sh script in the same dir where current script is located + agent.sh script will install, configure and start ambari-agent on host + :param args: Command line args + :param server_host_name: Server host name + """ + + # TODO, instead of cloning Ambari repo on each VM, do it on the server once and distribute to all of the agents. + contents = "#!/bin/bash\n" + \ + "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \ + "yum clean all; yum install git ambari-agent -y\n" + \ + "cd /home; git clone https://github.com/apache/ambari.git\n" + \ + "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \ + "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \ + "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \ + "python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n" + \ + "exit 0" + + with open("agent.sh", "w") as f: + f.write(contents) - file.write("sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name)) - file.write("sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script)) - file.write("printf \"start={0}\\nnum={1}\\nprefix={2}\" > /etc/ambari-agent/conf/agent-multiplier.conf\n".format(start_number, number_of_agents_on_host, args.agent_prefix)) - file.write("python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n") - file.write("exit 0") - file.close() - -# method to execute ssh commands via SSH class def execute_command(args, ip, cmd, fail_message, custom_option='', login='root'): + """ + Method to execute ssh commands via SSH class + :param args: Command line args + :param ip: IP to ssh to + :param cmd: Command to execute + :param fail_message: In case of an error, what to report + :param custom_option: Custom flags + :param login: Login user + :return: Return execute log message + """ ssh = SSH(login, args.key, ip, cmd, custom_option, fail_message) ssh_result = ssh.run() status_code = ssh_result["exitstatus"] @@ -312,8 +357,18 @@ def execute_command(args, ip, cmd, fail_message, custom_option='', login='root') return ssh_result["log"][0] -# method to copy file from local to remote host via SCP class + def put_file(args, ip, local_file, remote_file, fail_message, login='root'): + """ + Method to copy file from local to remote host via SCP class + :param args: Command line args + :param ip: IP to ssh to + :param local_file: Path to local file + :param remote_file: Path to remote file + :param fail_message: In case of an error, what to report + :param login: Login user. + :return: Return copy log message + """ scp = SCP(login, args.key, ip, local_file, remote_file, fail_message) scp_result = scp.run() @@ -323,10 +378,15 @@ def put_file(args, ip, local_file, remote_file, fail_message, login='root'): return scp_result["log"][0] -# method to parse "gce fqdn {cluster-name}" command output and get hosts and ips pairs for every host in cluster + def get_vms_list(args): - gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}-{1}'.format( - args.cluster_prefix, cluster_name) + """ + Method to parse "gce fqdn {cluster-name}" command output and get hosts and ips pairs for every host in cluster + :param args: Command line args + :return: Mapping of VM host name to ip. + """ + + gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}-{1}'.format(cluster_prefix, args.cluster_suffix) out = execute_command(args, args.controller, gce_fqdb_cmd, "Failed to get VMs list!", "-tt") lines = out.split('\n') #print "LINES=" + str(lines) @@ -339,15 +399,15 @@ def get_vms_list(args): if match: result[match.group(2)] = match.group(1) else: - raise Exception('Can not parse "{0}"'.format(s)) + raise Exception('Cannot parse "{0}"'.format(s)) return result else: - raise Exception('Can not parse "{0}"'.format(lines)) + raise Exception('Cannot parse "{0}"'.format(lines)) def pretty_print_vms(vms): print "----------------------------" - print "server ip: {0}".format(sorted(vms.items())[0][1]) + print "Server IP: {0}".format(sorted(vms.items())[0][1]) print "Hostnames of nodes in cluster:" for (hostname, ip) in sorted(vms.items()): print hostname