Return-Path: Delivered-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Received: (qmail 54297 invoked from network); 18 Sep 2009 06:44:00 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 18 Sep 2009 06:44:00 -0000 Received: (qmail 27077 invoked by uid 500); 18 Sep 2009 06:44:00 -0000 Delivered-To: apmail-hadoop-mapreduce-commits-archive@hadoop.apache.org Received: (qmail 27038 invoked by uid 500); 18 Sep 2009 06:44:00 -0000 Mailing-List: contact mapreduce-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mapreduce-dev@hadoop.apache.org Delivered-To: mailing list mapreduce-commits@hadoop.apache.org Received: (qmail 27028 invoked by uid 99); 18 Sep 2009 06:44:00 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 18 Sep 2009 06:44:00 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 18 Sep 2009 06:43:51 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 90C1E2388893; Fri, 18 Sep 2009 06:43:31 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r816487 - in /hadoop/mapreduce/trunk: CHANGES.txt src/tools/org/apache/hadoop/tools/DistCp.java Date: Fri, 18 Sep 2009 06:43:31 -0000 To: mapreduce-commits@hadoop.apache.org From: cdouglas@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090918064331.90C1E2388893@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: cdouglas Date: Fri Sep 18 06:43:31 2009 New Revision: 816487 URL: http://svn.apache.org/viewvc?rev=816487&view=rev Log: MAPREDUCE-654. Add a -dryrun option to distcp printing a summary of the file data to be copied, without actually performing the copy. Contributed by Ravi Gummadi Modified: hadoop/mapreduce/trunk/CHANGES.txt hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Modified: hadoop/mapreduce/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=816487&r1=816486&r2=816487&view=diff ============================================================================== --- hadoop/mapreduce/trunk/CHANGES.txt (original) +++ hadoop/mapreduce/trunk/CHANGES.txt Fri Sep 18 06:43:31 2009 @@ -365,6 +365,10 @@ checksums in distcp. Also adds an intra-task retry mechanism for errors detected during the copy. (Ravi Gummadi via cdouglas) + MAPREDUCE-654. Add a -dryrun option to distcp printing a summary of the + file data to be copied, without actually performing the copy. (Ravi Gummadi + via cdouglas) + BUG FIXES MAPREDUCE-878. Rename fair scheduler design doc to Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java?rev=816487&r1=816486&r2=816487&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java (original) +++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Fri Sep 18 06:43:31 2009 @@ -101,6 +101,9 @@ "\n-filelimit Limit the total number of files to be <= n" + "\n-sizelimit Limit the total size to be <= n bytes" + "\n-delete Delete the files existing in the dst but not in src" + + "\n-dryrun Display count of files and total size of files" + + "\n in src and then exit. Copy is not done at all." + + "\n desturl should not be speicified with out -update." + "\n-mapredSslConf Filename of SSL configuration for mapper task" + "\n\nNOTE 1: if -overwrite or -update are set, each source URI is " + @@ -431,11 +434,15 @@ FSDataOutputStream out = null; long bytesCopied = 0L; try { + Path srcPath = srcstat.getPath(); // open src file - in = srcstat.getPath().getFileSystem(job).open(srcstat.getPath()); + in = srcPath.getFileSystem(job).open(srcPath); reporter.incrCounter(Counter.BYTESEXPECTED, srcstat.getLen()); // open tmp file out = create(tmpfile, reporter, srcstat); + LOG.info("Copying file " + srcPath + " of size " + + srcstat.getLen() + " bytes..."); + // copy file for(int bytesRead; (bytesRead = in.read(buffer)) >= 0; ) { out.write(buffer, 0, bytesRead); @@ -714,7 +721,7 @@ final Path dst = new Path(destPath); copy(conf, new Arguments(tmp, null, dst, logPath, flags, null, - Long.MAX_VALUE, Long.MAX_VALUE, null)); + Long.MAX_VALUE, Long.MAX_VALUE, null, false)); } /** Sanity check for srcPath */ @@ -748,7 +755,9 @@ static void copy(final Configuration conf, final Arguments args ) throws IOException { LOG.info("srcPaths=" + args.srcs); - LOG.info("destPath=" + args.dst); + if (!args.dryrun || args.flags.contains(Options.UPDATE)) { + LOG.info("destPath=" + args.dst); + } checkSrcPath(conf, args.srcs); JobConf job = createJobConf(conf); @@ -764,10 +773,14 @@ if (setup(conf, job, args)) { JobClient.runJob(job); } - finalize(conf, job, args.dst, args.preservedAttributes); + if(!args.dryrun) { + finalize(conf, job, args.dst, args.preservedAttributes); + } } finally { - //delete tmp - fullyDelete(job.get(TMP_DIR_LABEL), job); + if (!args.dryrun) { + //delete tmp + fullyDelete(job.get(TMP_DIR_LABEL), job); + } //delete jobDirectory fullyDelete(job.get(JOB_DIR_LABEL), job); } @@ -838,6 +851,7 @@ final long filelimit; final long sizelimit; final String mapredSslConf; + final boolean dryrun; /** * Arguments for distcp @@ -852,7 +866,8 @@ */ Arguments(List srcs, Path basedir, Path dst, Path log, EnumSet flags, String preservedAttributes, - long filelimit, long sizelimit, String mapredSslConf) { + long filelimit, long sizelimit, String mapredSslConf, + boolean dryrun) { this.srcs = srcs; this.basedir = basedir; this.dst = dst; @@ -862,6 +877,7 @@ this.filelimit = filelimit; this.sizelimit = sizelimit; this.mapredSslConf = mapredSslConf; + this.dryrun = dryrun; if (LOG.isTraceEnabled()) { LOG.trace("this = " + this); @@ -879,6 +895,7 @@ String mapredSslConf = null; long filelimit = Long.MAX_VALUE; long sizelimit = Long.MAX_VALUE; + boolean dryrun = false; for (int idx = 0; idx < args.length; idx++) { Options[] opt = Options.values(); @@ -917,6 +934,9 @@ throw new IllegalArgumentException("ssl conf file not specified in -mapredSslConf"); } mapredSslConf = args[idx]; + } else if ("-dryrun".equals(args[idx])) { + dryrun = true; + dst = new Path("/tmp/distcp_dummy_dest");//dummy destination } else if ("-m".equals(args[idx])) { if (++idx == args.length) { throw new IllegalArgumentException("num_maps not specified in -m"); @@ -929,7 +949,8 @@ } } else if ('-' == args[idx].codePointAt(0)) { throw new IllegalArgumentException("Invalid switch " + args[idx]); - } else if (idx == args.length -1) { + } else if (idx == args.length -1 && + (!dryrun || flags.contains(Options.UPDATE))) { dst = new Path(args[idx]); } else { srcs.add(new Path(args[idx])); @@ -953,7 +974,7 @@ + Options.UPDATE + "."); } return new Arguments(srcs, basedir, dst, log, flags, presevedAttributes, - filelimit, sizelimit, mapredSslConf); + filelimit, sizelimit, mapredSslConf, dryrun); } /** {@inheritDoc} */ @@ -1150,7 +1171,8 @@ //set boolean values final boolean update = args.flags.contains(Options.UPDATE); - final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); + final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE) + && !args.dryrun; jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, @@ -1218,7 +1240,8 @@ final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; - long fileCount = 0L, dirCount = 0L, byteCount = 0L, cbsyncs = 0L; + long fileCount = 0L, dirCount = 0L, byteCount = 0L, cbsyncs = 0L, + skipFileCount = 0L, skipByteCount = 0L; Path basedir = null; HashSet parentDirsToCopy = new HashSet(); @@ -1327,6 +1350,13 @@ cbsyncs = 0L; } } + else { + ++skipFileCount; + skipByteCount += child.getLen(); + if (LOG.isTraceEnabled()) { + LOG.trace("skipping file " + child.getPath()); + } + } } if (!skipPath) { @@ -1353,7 +1383,17 @@ checkAndClose(dst_writer); checkAndClose(dir_writer); } - + LOG.info("sourcePathsCount(files+directories)=" + srcCount); + LOG.info("filesToCopyCount=" + fileCount); + LOG.info("bytesToCopyCount=" + StringUtils.humanReadableInt(byteCount)); + if (update) { + LOG.info("filesToSkipCopyCount=" + skipFileCount); + LOG.info("bytesToSkipCopyCount=" + + StringUtils.humanReadableInt(skipByteCount)); + } + if (args.dryrun) { + return false; + } int mapCount = setMapCount(byteCount, jobConf); // Increase the replication of _distcp_src_files, if needed setReplication(conf, jobConf, srcfilelist, mapCount);