Return-Path: Delivered-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Received: (qmail 96860 invoked from network); 29 Jun 2009 18:53:39 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 29 Jun 2009 18:53:39 -0000 Received: (qmail 70676 invoked by uid 500); 29 Jun 2009 18:53:50 -0000 Delivered-To: apmail-hadoop-mapreduce-commits-archive@hadoop.apache.org Received: (qmail 70653 invoked by uid 500); 29 Jun 2009 18:53:50 -0000 Mailing-List: contact mapreduce-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mapreduce-dev@hadoop.apache.org Delivered-To: mailing list mapreduce-commits@hadoop.apache.org Delivered-To: moderator for mapreduce-commits@hadoop.apache.org Received: (qmail 46502 invoked by uid 99); 29 Jun 2009 18:20:32 -0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r789402 - in /hadoop/mapreduce/trunk: CHANGES.txt src/tools/org/apache/hadoop/tools/DistCp.java Date: Mon, 29 Jun 2009 18:20:01 -0000 To: mapreduce-commits@hadoop.apache.org From: szetszwo@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090629182001.92D172388895@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: szetszwo Date: Mon Jun 29 18:20:01 2009 New Revision: 789402 URL: http://svn.apache.org/viewvc?rev=789402&view=rev Log: MAPREDUCE-646. Increase srcfilelist replication number in dictcp job. Contributed by Ravi Gummadi Modified: hadoop/mapreduce/trunk/CHANGES.txt hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Modified: hadoop/mapreduce/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=789402&r1=789401&r2=789402&view=diff ============================================================================== --- hadoop/mapreduce/trunk/CHANGES.txt (original) +++ hadoop/mapreduce/trunk/CHANGES.txt Mon Jun 29 18:20:01 2009 @@ -38,6 +38,9 @@ MAPREDUCE-416. Moves the history file to a "done" folder whenever a job completes. (Amar Kamat via ddas) + MAPREDUCE-646. Increase srcfilelist replication number in dictcp job. + (Ravi Gummadi via szetszwo) + BUG FIXES HADOOP-4687. MapReduce is split from Hadoop Core. It is a subproject under Hadoop (Owen O'Malley) Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java?rev=789402&r1=789401&r2=789402&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java (original) +++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Mon Jun 29 18:20:01 2009 @@ -945,14 +945,16 @@ * @param job The job to configure * @return Count of maps to run. */ - private static void setMapCount(long totalBytes, JobConf job) + private static int setMapCount(long totalBytes, JobConf job) throws IOException { int numMaps = (int)(totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP)); numMaps = Math.min(numMaps, job.getInt(MAX_MAPS_LABEL, MAX_MAPS_PER_NODE * new JobClient(job).getClusterStatus().getTaskTrackers())); - job.setNumMapTasks(Math.max(numMaps, 1)); + numMaps = Math.max(numMaps, 1); + job.setNumMapTasks(numMaps); + return numMaps; } /** Fully delete dir */ @@ -987,6 +989,28 @@ } /** + * Increase the replication factor of _distcp_src_files to + * sqrt(min(maxMapsOnCluster, numMaps)). This is to reduce the chance of + * failing of distcp because of "not having a replication of _distcp_src_files + * available for reading for some maps". + */ + private static void setReplication(Configuration conf, JobConf jobConf, + Path srcfilelist, int numMaps) throws IOException { + int numMaxMaps = new JobClient(jobConf).getClusterStatus().getMaxMapTasks(); + short replication = (short) Math.ceil( + Math.sqrt(Math.min(numMaxMaps, numMaps))); + FileSystem fs = srcfilelist.getFileSystem(conf); + FileStatus srcStatus = fs.getFileStatus(srcfilelist); + + if (srcStatus.getReplication() < replication) { + if (!fs.setReplication(srcfilelist, replication)) { + throw new IOException("Unable to increase the replication of file " + + srcfilelist); + } + } + } + + /** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. @@ -1142,6 +1166,10 @@ checkAndClose(dir_writer); } + int mapCount = setMapCount(byteCount, jobConf); + // Increase the replication of _distcp_src_files, if needed + setReplication(conf, jobConf, srcfilelist, mapCount); + FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); @@ -1173,7 +1201,7 @@ LOG.info("bytesToCopyCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); - setMapCount(byteCount, jobConf); + return (fileCount + dirCount) > 0; }