Return-Path: Delivered-To: apmail-hadoop-core-commits-archive@www.apache.org Received: (qmail 22515 invoked from network); 4 Mar 2008 23:15:13 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 4 Mar 2008 23:15:13 -0000 Received: (qmail 98329 invoked by uid 500); 4 Mar 2008 23:15:09 -0000 Delivered-To: apmail-hadoop-core-commits-archive@hadoop.apache.org Received: (qmail 98207 invoked by uid 500); 4 Mar 2008 23:15:09 -0000 Mailing-List: contact core-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: core-dev@hadoop.apache.org Delivered-To: mailing list core-commits@hadoop.apache.org Received: (qmail 98198 invoked by uid 99); 4 Mar 2008 23:15:09 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 04 Mar 2008 15:15:09 -0800 X-ASF-Spam-Status: No, hits=-1998.5 required=10.0 tests=ALL_TRUSTED,WEIRD_PORT X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 04 Mar 2008 23:14:32 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 0DD6A1A9842; Tue, 4 Mar 2008 15:14:43 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r633701 [3/3] - in /hadoop/core/branches/branch-0.16: ./ docs/ src/contrib/hod/bin/ src/contrib/hod/hodlib/Common/ src/contrib/hod/hodlib/Hod/ src/docs/src/documentation/content/xdocs/ Date: Tue, 04 Mar 2008 23:14:39 -0000 To: core-commits@hadoop.apache.org From: nigel@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080304231443.0DD6A1A9842@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hod URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hod?rev=633701&r1=633700&r2=633701&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hod (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hod Tue Mar 4 15:14:28 2008 @@ -51,6 +51,7 @@ get_exception_error_string, hodInterrupt, \ HOD_INTERRUPTED_MESG, HOD_INTERRUPTED_CODE from hodlib.Common.tcp import tcpError, tcpSocket +from hodlib.Hod.hod import hodHelp filter_warnings() @@ -80,7 +81,12 @@ # defList = { 'hod' : ( ('original-dir', 'directory', 'hod original start directory', - False, None, True, True, 'r'), + False, None, True, True, 'r'), + + ('clusterdir', 'directory', + 'Directory where cluster state information and hadoop-site.xml' + + ' will be stored.', + True, None, False, True, 'd'), ('syslog-address', 'address', 'Syslog address.', False, None, False, True, 'y'), @@ -92,15 +98,14 @@ True, 3, True, True, 'b'), ('stream', 'bool', 'Output to stderr.', - False, True, False, True, 's'), + False, True, False, True), - ('min-nodes', 'pos_int', - 'Minimum number of nodes to allocate at startup. ' + \ - 'Used with hod.script option', - True, None, False, True, 'm'), + ('nodecount', 'pos_int', + 'Number of nodes to allocate at startup. ', + True, None, False, True, 'n'), ('script', 'file', 'Hadoop script to execute.', - True, None, False, True, 'z'), + True, None, False, False, 's'), ('userid', 'user_account', 'User ID the hod shell is running under.', @@ -109,11 +114,11 @@ ('allocate-wait-time', 'pos_int', 'Time to wait for cluster allocation.', False, 300, True, True, 'e'), - - ('operation', 'string', - 'Initiate a hod operation. (help, allocate, deallocate ...)', - True, None, False, True, 'o'), + ('operation', 'string', + 'Initiate a hod operation. (help, allocate, deallocate ...)', + False, None, False, True, 'o'), + ('cluster-factor', 'pos_float', 'The number of grid slots per machines', False, 1.9, False, True, 'x'), @@ -144,7 +149,7 @@ True, "HOD", False, True, 'N'), ('walltime', 'pos_int', 'Walltime in seconds for the current HOD allocation', - True, None, False, True), + True, None, False, True, 'l'), ('script-wait-time', 'pos_int', 'Specifies the time to wait before running the script. Used with the hod.script option.', True, 10, False, True, 'W')), @@ -361,9 +366,12 @@ try: confDef = definition() confDef.add_defs(defList, defOrder) - hodOptions = options(confDef, "./%s -c [OPTIONS]" % myName, - VERSION, withConfig=True, defaultConfig=DEFAULT_CONFIG) - + hodhelp = hodHelp() + usage = hodhelp.help() + + hodOptions = options(confDef, usage, + VERSION, withConfig=True, defaultConfig=DEFAULT_CONFIG, + name=myName ) # hodConfig is a dict like object, hodConfig[section][name] try: hodConfig = config(hodOptions['config'], configDef=confDef, @@ -384,22 +392,9 @@ sys.exit(1) ## TODO : should move the dependency verification to hodConfig.verify - if hodConfig['hod'].has_key('script') \ - and not hodConfig['hod'].has_key('min-nodes'): - printErrors(hodConfig.var_error('hod', 'min-nodes', - "hod.min-nodes must be specified when using hod.script option.")) - sys.exit(1) - - if hodConfig['hod'].has_key('min-nodes'): - if hodConfig['hod']['min-nodes'] < 3: - printErrors(hodConfig.var_error('hod', 'min-nodes', - "hod.min-nodes must be >= 3 nodes: %s." % - hodConfig['hod']['min-nodes'])) - sys.exit(1) - if hodConfig['hod'].has_key('operation') and \ hodConfig['hod'].has_key('script'): - print "Script execution and hod operations are mutually exclusive." + print "Script operation is mutually exclusive with other HOD operations" hodOptions.print_help(sys.stderr) sys.exit(1) Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Common/setup.py URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Common/setup.py?rev=633701&r1=633700&r2=633701&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Common/setup.py (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Common/setup.py Tue Mar 4 15:14:28 2008 @@ -27,6 +27,7 @@ from optparse import OptionParser, IndentedHelpFormatter, OptionGroup from util import get_perms, replace_escapes from types import typeValidator, is_valid_type, typeToString +from hodlib.Hod.hod import hodHelp reEmailAddress = re.compile("^.*@.*$") reEmailDelimit = re.compile("@") @@ -228,8 +229,8 @@ errorStrings = [] if not self._dict[section].has_key(option): self._dict[section][option] = None - errorStrings.append("%s: invalid '%s' specified in section %s: %s" % ( - errorPrefix, option, section, self._dict[section][option])) + errorStrings.append("%s: invalid '%s' specified in section %s (--%s.%s): %s" % ( + errorPrefix, option, section, section, option, self._dict[section][option])) if addData: errorStrings.append("%s: additional info: %s\n" % (errorPrefix, @@ -238,11 +239,8 @@ def var_error_suggest(self, errorStrings): if self.configFile: - errorStrings.append("See configuration file: %s" % \ - self.configFile) - - if self._options: - errorStrings.append("Configuration can be overridden by options, see -h") + errorStrings.append("Check your command line options and/or " + \ + "your configuration file %s" % self.configFile) def __get_args(self, section): def __dummyToString(type, value): @@ -603,7 +601,8 @@ class options(OptionParser, baseConfig): def __init__(self, optionDef, usage, version, originalDir=None, - withConfig=False, defaultConfig=None, defaultLocation=None): + withConfig=False, defaultConfig=None, defaultLocation=None, + name=None): """Constructs and options object. optionDef - definition object @@ -619,6 +618,7 @@ self.formatter = formatter(4, max_help_position=100, width=180, short_first=1) + self.__name = name self.__version = version self.__withConfig = withConfig self.__defaultConfig = defaultConfig @@ -671,6 +671,85 @@ (self.__parsedOptions, self.args) = self.parse_args() + # Now process the positional arguments only for the client side + if self.__name == 'hod': + + hodhelp = hodHelp() + + _operation = getattr(self.__parsedOptions,'hod.operation') + _script = getattr(self.__parsedOptions, 'hod.script') + nArgs = self.args.__len__() + if _operation: + # -o option is given + if nArgs != 0: + self.error('invalid syntax : command and operation(-o) cannot coexist') + elif nArgs == 0 and _script: + # for a script option, without subcommand: hod -s script ... + pass + elif nArgs == 0: + print "Usage: ",hodhelp.help() + sys.exit(0) + else: + # subcommand is given + cmdstr = self.args[0] # the subcommand itself + cmdlist = hodhelp.ops + if cmdstr not in cmdlist: + print "Usage: ", hodhelp.help() + sys.exit(2) + + numNodes = None + clusterDir = None + # Check which subcommand. cmdstr = subcommand itself now. + if cmdstr == "allocate": + clusterDir = getattr(self.__parsedOptions, 'hod.clusterdir') + numNodes = getattr(self.__parsedOptions, 'hod.nodecount') + + if not clusterDir or not numNodes: + print getattr(hodhelp, "help_%s" % cmdstr)() + sys.exit(3) + + cmdstr = cmdstr + ' ' + clusterDir + ' ' + numNodes + + setattr(self.__parsedOptions,'hod.operation', cmdstr) + + elif cmdstr == "deallocate" or cmdstr == "info": + clusterDir = getattr(self.__parsedOptions, 'hod.clusterdir') + + if not clusterDir: + print getattr(hodhelp, "help_%s" % cmdstr)() + sys.exit(3) + + cmdstr = cmdstr + ' ' + clusterDir + setattr(self.__parsedOptions,'hod.operation', cmdstr) + + elif cmdstr == "list": + setattr(self.__parsedOptions,'hod.operation', cmdstr) + pass + + elif cmdstr == "script": + clusterDir = getattr(self.__parsedOptions, 'hod.clusterdir') + numNodes = getattr(self.__parsedOptions, 'hod.nodecount') + + if not _script or not clusterDir or not numNodes: + print getattr(hodhelp, "help_%s" % cmdstr)() + sys.exit(3) + pass + + elif cmdstr == "help": + if nArgs == 1: + self.print_help() + sys.exit(0) + elif nArgs != 2: + self.print_help() + sys.exit(3) + elif self.args[1] == 'options': + self.print_options() + sys.exit(0) + cmdstr = cmdstr + ' ' + self.args[1] + setattr(self.__parsedOptions,'hod.operation', cmdstr) + + # end of processing for arguments on the client side + if self.__withConfig: self.config = self.__parsedOptions.config if not self.config: @@ -925,6 +1004,12 @@ self.__set_display_groups() OptionParser.print_help(self, file) self.__unset_display_groups() + + def print_options(self): + _usage = self.usage + self.set_usage('') + self.print_help() + self.set_usage(_usage) def verify(self): return baseConfig.verify(self) Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Hod/hod.py URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Hod/hod.py?rev=633701&r1=633700&r2=633701&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Hod/hod.py (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Hod/hod.py Tue Mar 4 15:14:28 2008 @@ -88,8 +88,8 @@ class hodRunner: def __init__(self, cfg): - self.__ops = [ 'prepare', 'allocate', 'deallocate', - 'list', 'info', 'help' ] + self.__hodhelp = hodHelp() + self.__ops = self.__hodhelp.ops self.__cfg = cfg self.__npd = self.__cfg['nodepooldesc'] self.__opCode = 0 @@ -185,80 +185,94 @@ argLength = len(args) min = 0 max = 0 + errorFlag = False + errorMsgs = [] + if argLength == 3: nodes = args[2] clusterDir = self.__norm_cluster_dir(args[1]) - if os.path.isdir(clusterDir): - self.__setup_cluster_logger(clusterDir) - if re.match('\d+-\d+', nodes): - (min, max) = nodes.split("-") - min = int(min) - max = int(max) - else: - try: - nodes = int(nodes) - min = nodes - max = nodes - except ValueError: - self.__log.critical( - "%s operation requires a single argument. n nodes, or n-m nodes." % - operation) - self.__opCode = 3 - else: - self.__setup_cluster_state(clusterDir) - clusterInfo = self.__clusterState.read() - self.__opCode = self.__cluster.check_cluster(clusterInfo) - if self.__opCode == 0 or self.__opCode == 15: - self.__setup_service_registry() - if hodInterrupt.isSet(): - self.__cleanup() - raise HodInterruptException() - self.__log.info("Service Registry Started.") - try: - allocateStatus = self.__cluster.allocate(clusterDir, min, max) - except HodInterruptException, h: - self.__cleanup() - raise h - # Allocation has gone through. - # Don't care about interrupts any more - - if allocateStatus == 0: - self.__set_cluster_state_info(os.environ, - self.__cluster.hdfsInfo, - self.__cluster.mapredInfo, - self.__cluster.ringmasterXRS, - self.__cluster.jobId, - min, max) - self.__setup_cluster_state(clusterDir) - self.__clusterState.write(self.__cluster.jobId, - self.__clusterStateInfo) - # Do we need to check for interrupts here ?? - - self.__set_user_state_info( - { clusterDir : self.__cluster.jobId, } ) - self.__opCode = allocateStatus - elif self.__opCode == 12: - self.__log.critical("Cluster %s already allocated." % clusterDir) - elif self.__opCode == 10: - self.__log.critical("dead\t%s\t%s" % (clusterInfo['jobid'], - clusterDir)) - elif self.__opCode == 13: - self.__log.warn("hdfs dead\t%s\t%s" % (clusterInfo['jobid'], - clusterDir)) - elif self.__opCode == 14: - self.__log.warn("mapred dead\t%s\t%s" % (clusterInfo['jobid'], - clusterDir)) - - if self.__opCode > 0 and self.__opCode != 15: - self.__log.critical("Cannot allocate cluster %s" % clusterDir) - - else: - self.__log.critical("Invalid cluster directory '%s' specified." % - clusterDir) + + if not os.path.isdir(clusterDir): + errorFlag = True + errorMsgs.append("Invalid cluster directory(--hod.clusterdir or -d) "+\ + "'%s' specified." % clusterDir) + if int(nodes) < 3 : + errorFlag = True + errorMsgs.append("hod.nodecount(--hod.nodecount or -n) must be >= 3."+\ + " Given nodes: %s" % nodes) + if errorFlag: + for msg in errorMsgs: + self.__log.critical(msg) self.__opCode = 3 + return + + self.__setup_cluster_logger(clusterDir) + if re.match('\d+-\d+', nodes): + (min, max) = nodes.split("-") + min = int(min) + max = int(max) + else: + try: + nodes = int(nodes) + min = nodes + max = nodes + except ValueError: + print self.__hodhelp.help_allocate() + self.__log.critical( + "%s operation requires a single argument. n nodes, or n-m nodes." % + operation) + self.__opCode = 3 + else: + self.__setup_cluster_state(clusterDir) + clusterInfo = self.__clusterState.read() + self.__opCode = self.__cluster.check_cluster(clusterInfo) + if self.__opCode == 0 or self.__opCode == 15: + self.__setup_service_registry() + if hodInterrupt.isSet(): + self.__cleanup() + raise HodInterruptException() + self.__log.info("Service Registry Started.") + try: + allocateStatus = self.__cluster.allocate(clusterDir, min, max) + except HodInterruptException, h: + self.__cleanup() + raise h + # Allocation has gone through. + # Don't care about interrupts any more + + if allocateStatus == 0: + self.__set_cluster_state_info(os.environ, + self.__cluster.hdfsInfo, + self.__cluster.mapredInfo, + self.__cluster.ringmasterXRS, + self.__cluster.jobId, + min, max) + self.__setup_cluster_state(clusterDir) + self.__clusterState.write(self.__cluster.jobId, + self.__clusterStateInfo) + # Do we need to check for interrupts here ?? + + self.__set_user_state_info( + { clusterDir : self.__cluster.jobId, } ) + self.__opCode = allocateStatus + elif self.__opCode == 12: + self.__log.critical("Cluster %s already allocated." % clusterDir) + elif self.__opCode == 10: + self.__log.critical("dead\t%s\t%s" % (clusterInfo['jobid'], + clusterDir)) + elif self.__opCode == 13: + self.__log.warn("hdfs dead\t%s\t%s" % (clusterInfo['jobid'], + clusterDir)) + elif self.__opCode == 14: + self.__log.warn("mapred dead\t%s\t%s" % (clusterInfo['jobid'], + clusterDir)) + + if self.__opCode > 0 and self.__opCode != 15: + self.__log.critical("Cannot allocate cluster %s" % clusterDir) else: + print self.__hodhelp.help_allocate() self.__log.critical("%s operation requires two arguments. " % operation - + "A cluster path and n nodes, or min-max nodes.") + + "A cluster directory and a nodecount.") self.__opCode = 3 def _is_cluster_allocated(self, clusterDir): @@ -292,6 +306,7 @@ clusterDir) self.__opCode = 3 else: + print self.__hodhelp.help_deallocate() self.__log.critical("%s operation requires one argument. " % operation + "A cluster path.") self.__opCode = 3 @@ -341,6 +356,7 @@ self.__log.critical("'%s' does not exist." % clusterDir) self.__opCode = 3 else: + print self.__hodhelp.help_info() self.__log.critical("%s operation requires one argument. " % operation + "A cluster path.") self.__opCode = 3 @@ -356,21 +372,18 @@ for var in clusterInfo['env'].keys(): self.__log.debug("%s = %s" % (var, clusterInfo['env'][var])) - - def _op_help(self, args): - print "hod operations:\n" - print " allocate - Allocates a cluster of n nodes using the specified cluster" - print " directory to store cluster state information. The Hadoop site XML" - print " is also stored in this location." - print "" - print " deallocate - Deallocates a cluster using the pecified cluster directory. This" - print " operation is also required to clean up a dead cluster." - print "" - print " list - List all clusters currently allocated by a user, along with" - print " limited status information and the cluster's job ID." - print "" - print " info - Provide detailed information on an allocated cluster." - + def _op_help(self, arg): + if arg == None or arg.__len__() != 2: + print "hod commands:\n" + for op in self.__ops: + print getattr(self.__hodhelp, "help_%s" % op)() + else: + if arg[1] not in self.__ops: + print self.__hodhelp.help_help() + self.__log.critical("Help requested for invalid operation : %s"%arg[1]) + self.__opCode = 3 + else: print getattr(self.__hodhelp, "help_%s" % arg[1])() + def operation(self): operation = self.__cfg['hod']['operation'] try: @@ -393,16 +406,37 @@ return self.__opCode def script(self): + errorFlag = False + errorMsgs = [] + script = self.__cfg['hod']['script'] - nodes = self.__cfg['hod']['min-nodes'] - isExecutable = os.access(script, os.X_OK) - if not isExecutable: - self.__log.critical('Script %s is not an executable.' % script) - return 1 - - clusterDir = "/tmp/%s.%s" % (self.__cfg['hod']['userid'], - random.randint(0, 20000)) - os.mkdir(clusterDir) + nodes = self.__cfg['hod']['nodecount'] + clusterDir = self.__cfg['hod']['clusterdir'] + + if not os.path.isfile(script): + errorFlag = True + errorMsgs.append("Invalid script file (--hod.script or -s) " + \ + "specified : %s" % script) + else: + isExecutable = os.access(script, os.X_OK) + if not isExecutable: + errorFlag = True + errorMsgs.append('Script %s is not an executable.' % \ + self.__cfg['hod']['script']) + if not os.path.isdir(self.__cfg['hod']['clusterdir']): + errorFlag = True + errorMsgs.append("Invalid cluster directory (--hod.clusterdir or -d) " +\ + "'%s' specified." % self.__cfg['hod']['clusterdir']) + if int(self.__cfg['hod']['nodecount']) < 3 : + errorFlag = True + errorMsgs.append("nodecount(--hod.nodecount or -n) must be >= 3. " + \ + "Given nodes: %s" % self.__cfg['hod']['nodecount']) + + if errorFlag: + for msg in errorMsgs: + self.__log.critical(msg) + sys.exit(3) + ret = 0 try: self._op_allocate(('allocate', clusterDir, str(nodes))) @@ -426,7 +460,6 @@ hodInterrupt.setFlag(False) if self._is_cluster_allocated(clusterDir): self._op_deallocate(('deallocate', clusterDir)) - shutil.rmtree(clusterDir, True) except HodInterruptException, h: self.__log.critical("Script failed because of an process interrupt.") self.__opCode = HOD_INTERRUPTED_CODE @@ -442,3 +475,57 @@ self.__opCode = ret return self.__opCode + +class hodHelp(): + def __init__(self): + self.ops = ['allocate', 'deallocate', 'info', 'list','script', 'help'] + + def help_allocate(self): + return \ + "Usage : hod allocate -d -n [OPTIONS]\n" + \ + "Description : Allocates a cluster of n nodes using the specified \n" + \ + " cluster directory to store cluster state \n" + \ + " information. The Hadoop site XML is also stored \n" + \ + " in this location.\n" + \ + "For all options : hod help options.\n" + + def help_deallocate(self): + return "Usage : hod deallocate -d [OPTIONS]\n" + \ + "Description : Deallocates a cluster using the specified \n" + \ + " cluster directory. This operation is also \n" + \ + " required to clean up a dead cluster.\n" + \ + "For all options : hod help options.\n" + + def help_list(self): + return "Usage : hod list [OPTIONS]\n" + \ + "Description : List all clusters currently allocated by a user, \n" + \ + " along with limited status information and the \n" + \ + " cluster ID.\n" + \ + "For all options : hod help options.\n" + + def help_info(self): + return "Usage : hod info -d [OPTIONS]\n" + \ + "Description : Provide detailed information on an allocated cluster.\n" + \ + "For all options : hod help options.\n" + + def help_script(self): + return "Usage : hod script -d -n " + \ + "-s