Author: nigel Date: Wed Mar 5 08:36:36 2008 New Revision: 633918 URL: http://svn.apache.org/viewvc?rev=633918&view=rev Log: Merge of -r 633914:633915 from trunk to branch 0.16 to fix HADOOP-2925. Modified: hadoop/core/branches/branch-0.16/CHANGES.txt hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hod hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hodcleanup hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hodring hadoop/core/branches/branch-0.16/src/contrib/hod/bin/ringmaster hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Common/util.py hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Hod/hadoop.py hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/HodRing/hodRing.py hadoop/core/branches/branch-0.16/src/contrib/hod/support/logcondense.py hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/hod_config_guide.xml Modified: hadoop/core/branches/branch-0.16/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/CHANGES.txt?rev=633918&r1=633917&r2=633918&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/CHANGES.txt (original) +++ hadoop/core/branches/branch-0.16/CHANGES.txt Wed Mar 5 08:36:36 2008 @@ -113,6 +113,10 @@ datnodes flushes the block file buffered output stream before sending a positive ack for the packet back to the client. (dhruba) + HADOOP-2925. Fix HOD to create the mapred system directory using a + naming convention that will avoid clashes in multi-user shared + cluster scenario. (Hemanth Yamijala via nigel) + Release 0.16.0 - 2008-02-07 INCOMPATIBLE CHANGES Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hod URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hod?rev=633918&r1=633917&r2=633918&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hod (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hod Wed Mar 5 08:36:36 2008 @@ -346,7 +346,10 @@ False, 2, False, True), ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands', - False, 2, False, True)) + False, 2, False, True), + + ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.', + False, '/mapredsystem', False, False)) } defOrder = [ 'hod', 'ringmaster', 'hodring', 'resource_manager', Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hodcleanup URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hodcleanup?rev=633918&r1=633917&r2=633918&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hodcleanup (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hodcleanup Wed Mar 5 08:36:36 2008 @@ -123,7 +123,7 @@ # END LAME HACK (head, tail) = os.path.split(archiveFile) - destFile = os.path.join(hdfsURIMatch.group(2), conf['user-id'], conf['service-id'], tail) + destFile = os.path.join(hdfsURIMatch.group(2), conf['user-id'], 'hod-logs', conf['service-id'], tail) log.info("copying archive %s to DFS %s ..." % (archiveFile, destFile)) Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hodring URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hodring?rev=633918&r1=633917&r2=633918&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hodring (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/bin/hodring Wed Mar 5 08:36:36 2008 @@ -130,7 +130,10 @@ False, 2, False, True), ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands', - False, 2, False, True)) + False, 2, False, True), + + ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.', + False, '/mapredsystem', False, False)) } if __name__ == '__main__': Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/bin/ringmaster URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/bin/ringmaster?rev=633918&r1=633917&r2=633918&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/bin/ringmaster (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/bin/ringmaster Wed Mar 5 08:36:36 2008 @@ -261,8 +261,10 @@ False, 2, False, True), ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands', - False, 2, False, True)) - + False, 2, False, True), + + ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.', + False, '/mapredsystem', False, False)) } Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Common/util.py URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Common/util.py?rev=633918&r1=633917&r2=633918&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Common/util.py (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Common/util.py Wed Mar 5 08:36:36 2008 @@ -250,6 +250,9 @@ dict[splits[0]] = splits[1] return dict +def getMapredSystemDirectory(mrSysDirRoot, userid, jobid): + return os.path.join(mrSysDirRoot, userid, 'mapredsystem', jobid) + class HodInterrupt: def __init__(self): self.HodInterruptFlag = False Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Hod/hadoop.py URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Hod/hadoop.py?rev=633918&r1=633917&r2=633918&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Hod/hadoop.py (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/Hod/hadoop.py Wed Mar 5 08:36:36 2008 @@ -57,7 +57,7 @@ return prop - def gen_site_conf(self, confDir, tempDir, numNodes, hdfsAddr,\ + def gen_site_conf(self, confDir, tempDir, numNodes, hdfsAddr, mrSysDir,\ mapredAddr=None, clientParams=None, serverParams=None,\ finalServerParams=None, clusterFactor=None): if not mapredAddr: @@ -76,9 +76,7 @@ 'hadoop.tmp.dir' : confDir, \ 'dfs.client.buffer.dir' : tempDir, } - mapredAddrSplit = mapredAddr.split(":") - mapredsystem = os.path.join('/mapredsystem', mapredAddrSplit[0]) - paramsDict['mapred.system.dir'] = mapredsystem + paramsDict['mapred.system.dir'] = mrSysDir # mapred-default.xml is no longer used now. numred = int(math.floor(clusterFactor * (int(numNodes) - 1))) @@ -515,8 +513,10 @@ os.makedirs(tempDir) tempDir = os.path.join( tempDir, self.__cfg['hod']['userid']\ + "." + self.jobId ) + mrSysDir = getMapredSystemDirectory(self.__cfg['hodring']['mapred-system-dir-root'],\ + self.__cfg['hod']['userid'], self.jobId) self.__hadoopCfg.gen_site_conf(clusterDir, tempDir, min,\ - hdfsAddr, mapredAddr, clientParams,\ + hdfsAddr, mrSysDir, mapredAddr, clientParams,\ serverParams, finalServerParams,\ clusterFactor) self.__log.info("hadoop-site.xml at %s" % clusterDir) Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/HodRing/hodRing.py URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/HodRing/hodRing.py?rev=633918&r1=633917&r2=633918&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/HodRing/hodRing.py (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/hodlib/HodRing/hodRing.py Wed Mar 5 08:36:36 2008 @@ -25,7 +25,7 @@ from pprint import pformat from optparse import OptionParser from urlparse import urlparse -from hodlib.Common.util import local_fqdn, parseEquals +from hodlib.Common.util import local_fqdn, parseEquals, getMapredSystemDirectory from hodlib.Common.tcp import tcpSocket, tcpError binfile = sys.path[0] @@ -150,10 +150,12 @@ class HadoopCommand: """Runs a single hadoop command""" - def __init__(self, id, desc, tempdir, tardir, log, javahome, restart=False): + def __init__(self, id, desc, tempdir, tardir, log, javahome, + mrSysDir, restart=False): self.desc = desc self.log = log self.javahome = javahome + self.__mrSysDir = mrSysDir self.program = desc.getProgram() self.name = desc.getName() self.workdirs = desc.getWorkDirs() @@ -218,7 +220,7 @@ self.filledInKeyVals.append(keyvalpair) if ( v == "fillindir"): - v = os.path.join('/mapredsystem', local_fqdn()) + v = self.__mrSysDir pass prop = None @@ -509,8 +511,11 @@ id = 0 for desc in self._cfg['commanddesc']: self.log.debug(pprint.pformat(desc.dict)) + mrSysDir = getMapredSystemDirectory(self._cfg['mapred-system-dir-root'], + self._cfg['userid'], self._cfg['service-id']) + self.log.debug('mrsysdir is %s' % mrSysDir) cmd = HadoopCommand(id, desc, self.__tempDir, self.__pkgDir, self.log, - self._cfg['java-home'], restart) + self._cfg['java-home'], mrSysDir, restart) self.__hadoopLogDirs.append(cmd.logdir) self.log.debug("hadoop log directory: %s" % self.__hadoopLogDirs) Modified: hadoop/core/branches/branch-0.16/src/contrib/hod/support/logcondense.py URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/contrib/hod/support/logcondense.py?rev=633918&r1=633917&r2=633918&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/contrib/hod/support/logcondense.py (original) +++ hadoop/core/branches/branch-0.16/src/contrib/hod/support/logcondense.py Wed Mar 5 08:36:36 2008 @@ -125,7 +125,7 @@ for line in stdout: m = re.match("^(.*?)\s.*$", line) filename = m.group(1) - # file name format: ///[0-1]-[jobtracker|tasktracker|datanode|namenode|]-hostname-YYYYMMDDtime-random.tar.gz + # file name format: //hod-logs//[0-1]-[jobtracker|tasktracker|datanode|namenode|]-hostname-YYYYMMDDtime-random.tar.gz # first strip prefix: if filename.startswith(options.log): filename = filename.lstrip(options.log) @@ -135,7 +135,7 @@ continue # Now get other details from filename. - k = re.match("/(.*)/(.*)/.*-.*-([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*$", filename) + k = re.match("/(.*)/.*/(.*)/.*-.*-([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*$", filename) if k: username = k.group(1) jobid = k.group(2) @@ -159,7 +159,7 @@ if (diff.days > options.days): desttodel = filename if not toPurge.has_key(jobid): - toPurge[jobid] = options.log.rstrip("/") + "/" + username + "/" + jobid + toPurge[jobid] = options.log.rstrip("/") + "/" + username + "/hod-logs/" + jobid for job in toPurge.keys(): for prefix in deletedNamePrefixes: Modified: hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/hod_config_guide.xml URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/hod_config_guide.xml?rev=633918&r1=633917&r2=633918&view=diff ============================================================================== --- hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/hod_config_guide.xml (original) +++ hadoop/core/branches/branch-0.16/src/docs/src/documentation/content/xdocs/hod_config_guide.xml Wed Mar 5 08:36:36 2008 @@ -85,6 +85,10 @@ be picked for use to run an HTTP server.
  • java-home: Location of Java to be used by Hadoop.
  • +
  • syslog-address: Address to which a syslog daemon is bound to. The format + of the value is host:port. If configured, HOD log messages + will be logged to syslog using this value.
  • + @@ -200,6 +204,47 @@ JobTracker and TaskTrackers
  • final-server-params: Same as above, except they will be marked final.
  • + + + +
    + 3.7 hodring options + +
      +
    • mapred-system-dir-root: Directory in the DFS under which HOD will + generate sub-directory names and pass the full path + as the value of the 'mapred.system.dir' configuration + parameter to Hadoop daemons. The format of the full + path will be value-of-this-option/userid/mapredsystem/cluster-id. + Note that the directory specified here should be such + that all users can create directories under this, if + permissions are enabled in HDFS. Setting the value of + this option to /user will make HOD use the user's + home directory to generate the mapred.system.dir value.
    • + +
    • log-destination-uri: URL describing a path in an external, static DFS or the + cluster node's local file system where HOD will upload + Hadoop logs when a cluster is deallocated. To specify a + DFS path, use the format 'hdfs://path'. To specify a + cluster node's local file path, use the format 'file://path'. + + When clusters are deallocated by HOD, the hadoop logs will + be deleted as part of HOD's cleanup process. In order to + persist these logs, you can use this configuration option. + + The format of the path is + value-of-this-option/userid/hod-logs/cluster-id + + Note that the directory you specify here must be such that all + users can create sub-directories under this. Setting this value + to hdfs://user will make the logs come in the user's home directory + in DFS.
    • + +
    • pkgs: Installation directory, under which bin/hadoop executable is located. This will + be used by HOD to upload logs if a HDFS URL is specified in log-destination-uri + option. Note that this is useful if the users are using a tarball whose version + may differ from the external, static HDFS version.
    • +