From hadoop-commits-return-511-apmail-lucene-hadoop-commits-archive=lucene.apache.org@lucene.apache.org Tue Aug 01 20:41:54 2006 Return-Path: Delivered-To: apmail-lucene-hadoop-commits-archive@locus.apache.org Received: (qmail 42810 invoked from network); 1 Aug 2006 20:41:53 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (209.237.227.199) by minotaur.apache.org with SMTP; 1 Aug 2006 20:41:53 -0000 Received: (qmail 6364 invoked by uid 500); 1 Aug 2006 20:41:42 -0000 Delivered-To: apmail-lucene-hadoop-commits-archive@lucene.apache.org Received: (qmail 6353 invoked by uid 500); 1 Aug 2006 20:41:42 -0000 Mailing-List: contact hadoop-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hadoop-dev@lucene.apache.org Delivered-To: mailing list hadoop-commits@lucene.apache.org Received: (qmail 6263 invoked by uid 99); 1 Aug 2006 20:41:42 -0000 Received: from asf.osuosl.org (HELO asf.osuosl.org) (140.211.166.49) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 01 Aug 2006 13:41:41 -0700 X-ASF-Spam-Status: No, hits=-9.4 required=10.0 tests=ALL_TRUSTED,NO_REAL_NAME X-Spam-Check-By: apache.org Received-SPF: pass (asf.osuosl.org: local policy) Received: from [140.211.166.113] (HELO eris.apache.org) (140.211.166.113) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 01 Aug 2006 13:41:40 -0700 Received: by eris.apache.org (Postfix, from userid 65534) id 6169B1A981A; Tue, 1 Aug 2006 13:41:20 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r427719 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/dfs/DFSShell.java src/java/org/apache/hadoop/fs/FileUtil.java Date: Tue, 01 Aug 2006 20:41:18 -0000 To: hadoop-commits@lucene.apache.org From: cutting@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20060801204120.6169B1A981A@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org X-Spam-Rating: minotaur.apache.org 1.6.2 0/1000/N Author: cutting Date: Tue Aug 1 13:41:18 2006 New Revision: 427719 URL: http://svn.apache.org/viewvc?rev=427719&view=rev Log: HADOOP-369. Add -getmerge option to dfs command that appends all files in a directory into a single local file. Contributed by Johan Oskarson. Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSShell.java lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileUtil.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=427719&r1=427718&r2=427719&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Tue Aug 1 13:41:18 2006 @@ -120,9 +120,13 @@ 34. HADOOP-345. Permit iteration over Configuration key/value pairs. (Michel Tourn via cutting) -35. HADOOP-409. Streaming contrib module: make Hadoop configuration +35. HADOOP-409. Streaming contrib module: make configuration properties available to commands as environment variables. (Michel Tourn via cutting) + +36. HADOOP-369. Add -getmerge option to dfs command that appends all + files in a directory into a single local file. + (Johan Oskarson via cutting) Release 0.4.0 - 2006-06-28 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSShell.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSShell.java?rev=427719&r1=427718&r2=427719&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSShell.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSShell.java Tue Aug 1 13:41:18 2006 @@ -59,6 +59,34 @@ void copyToLocal(String srcf, Path dst) throws IOException { fs.copyToLocalFile(new Path(srcf), dst); } + + /** + * Get all the files in the directory and output them to + * only one file on local fs + * srcf is kept. + */ + void copyMergeToLocal(String srcf, Path dst) throws IOException { + copyMergeToLocal(srcf, dst, false); + } + + + /** + * Get all the files in the directory and output them to + * only one file on local fs + * srcf is kept. + * + * Also adds a string between the files (useful for adding \n + * to a text file) + */ + void copyMergeToLocal(String srcf, Path dst, boolean endline) throws IOException { + if(endline) { + FileUtil.copyMerge(fs, new Path(srcf), + FileSystem.getNamed("local", conf), dst, false, conf, "\n"); + } else { + FileUtil.copyMerge(fs, new Path(srcf), + FileSystem.getNamed("local", conf), dst, false, conf, null); + } + } /** * Obtain the indicated DFS file and copy to the local name. @@ -300,8 +328,8 @@ " [-conf ] [-D <[property=value>]"+ " [-ls ] [-lsr ] [-du ] [-mv ] [-cp ] [-rm ]" + " [-put ] [-copyFromLocal ] [-moveFromLocal ]" + - " [-get ] [-cat ] [-copyToLocal ] [-moveToLocal ]" + - " [-mkdir ] [-report] [-setrep [-R] ]"); + " [-get ] [-getmerge [addnl]] [-cat ] [-copyToLocal ]" + + " [-moveToLocal ] [-mkdir ] [-report] [-setrep [-R] ]"); return -1; } @@ -318,6 +346,12 @@ moveFromLocal(new Path(argv[i++]), argv[i++]); } else if ("-get".equals(cmd) || "-copyToLocal".equals(cmd)) { copyToLocal(argv[i++], new Path(argv[i++])); + } else if ("-getmerge".equals(cmd)) { + if(argv.length>i+2) + copyMergeToLocal(argv[i++], new Path(argv[i++]), Boolean.parseBoolean(argv[i++])); + else + copyMergeToLocal(argv[i++], new Path(argv[i++])); + } else if ("-cat".equals(cmd)) { cat(argv[i++]); } else if ("-moveToLocal".equals(cmd)) { Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileUtil.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileUtil.java?rev=427719&r1=427718&r2=427719&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileUtil.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileUtil.java Tue Aug 1 13:41:18 2006 @@ -82,6 +82,45 @@ return true; } } + + /** Copy all files in a directory to one output file (merge). */ + public static boolean copyMerge(FileSystem srcFS, Path srcDir, + FileSystem dstFS, Path dstFile, + boolean deleteSource, + Configuration conf, String addString) throws IOException { + dstFile = checkDest(srcDir.getName(), dstFS, dstFile); + + if (!srcFS.isDirectory(srcDir)) + return false; + + OutputStream out = dstFS.create(dstFile); + + try { + Path contents[] = srcFS.listPaths(srcDir); + for (int i = 0; i < contents.length; i++) { + if (srcFS.isFile(contents[i])) { + InputStream in = srcFS.open(contents[i]); + try { + copyContent(in, out, conf, false); + if(addString!=null) + out.write(addString.getBytes("UTF-8")); + + } finally { + in.close(); + } + } + } + } finally { + out.close(); + } + + + if (deleteSource) { + return srcFS.delete(srcDir); + } else { + return true; + } + } /** Copy local files to a FileSystem. */ public static boolean copy(File src, @@ -142,7 +181,13 @@ } private static void copyContent(InputStream in, OutputStream out, - Configuration conf) throws IOException { + Configuration conf) throws IOException { + copyContent(in, out, conf, true); + } + + + private static void copyContent(InputStream in, OutputStream out, + Configuration conf, boolean close) throws IOException { byte buf[] = new byte[conf.getInt("io.file.buffer.size", 4096)]; try { int bytesRead = in.read(buf); @@ -151,7 +196,8 @@ bytesRead = in.read(buf); } } finally { - out.close(); + if(close) + out.close(); } }