Return-Path: Delivered-To: apmail-hadoop-core-commits-archive@www.apache.org Received: (qmail 24554 invoked from network); 26 Mar 2009 01:28:58 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 26 Mar 2009 01:28:58 -0000 Received: (qmail 15371 invoked by uid 500); 26 Mar 2009 00:22:47 -0000 Delivered-To: apmail-hadoop-core-commits-archive@hadoop.apache.org Received: (qmail 15346 invoked by uid 500); 26 Mar 2009 00:22:46 -0000 Mailing-List: contact core-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: core-dev@hadoop.apache.org Delivered-To: mailing list core-commits@hadoop.apache.org Received: (qmail 15332 invoked by uid 99); 26 Mar 2009 00:22:46 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 26 Mar 2009 00:22:46 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 26 Mar 2009 00:22:39 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 927E52388B32; Thu, 26 Mar 2009 00:22:19 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r758479 - in /hadoop/core/trunk: CHANGES.txt src/core/org/apache/hadoop/io/SequenceFile.java src/test/org/apache/hadoop/io/TestSequenceFile.java Date: Thu, 26 Mar 2009 00:22:19 -0000 To: core-commits@hadoop.apache.org From: cdouglas@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090326002219.927E52388B32@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: cdouglas Date: Thu Mar 26 00:22:18 2009 New Revision: 758479 URL: http://svn.apache.org/viewvc?rev=758479&view=rev Log: HADOOP-5423. Include option of preserving file metadata in SequenceFile::sort. Contributed by Michael Tamm Modified: hadoop/core/trunk/CHANGES.txt hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java Modified: hadoop/core/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=758479&r1=758478&r2=758479&view=diff ============================================================================== --- hadoop/core/trunk/CHANGES.txt (original) +++ hadoop/core/trunk/CHANGES.txt Thu Mar 26 00:22:18 2009 @@ -184,6 +184,9 @@ HADOOP-5491. In contrib/index, better control memory usage. (Ning Li via cutting) + HADOOP-5423. Include option of preserving file metadata in + SequenceFile::sort. (Michael Tamm via cdouglas) + OPTIMIZATIONS BUG FIXES Modified: hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java?rev=758479&r1=758478&r2=758479&view=diff ============================================================================== --- hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java (original) +++ hadoop/core/trunk/src/core/org/apache/hadoop/io/SequenceFile.java Thu Mar 26 00:22:18 2009 @@ -2233,6 +2233,7 @@ private Class valClass; private Configuration conf; + private Metadata metadata; private Progressable progressable = null; @@ -2245,6 +2246,12 @@ /** Sort and merge using an arbitrary {@link RawComparator}. */ public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, Class valClass, Configuration conf) { + this(fs, comparator, keyClass, valClass, conf, new Metadata()); + } + + /** Sort and merge using an arbitrary {@link RawComparator}. */ + public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, + Class valClass, Configuration conf, Metadata metadata) { this.fs = fs; this.comparator = comparator; this.keyClass = keyClass; @@ -2252,6 +2259,7 @@ this.memory = conf.getInt("io.sort.mb", 100) * 1024 * 1024; this.factor = conf.getInt("io.sort.factor", 100); this.conf = conf; + this.metadata = metadata; } /** Set the number of streams to merge at once.*/ @@ -2495,7 +2503,7 @@ long segmentStart = out.getPos(); Writer writer = createWriter(conf, out, keyClass, valClass, isCompressed, isBlockCompressed, codec, - new Metadata()); + done ? metadata : new Metadata()); if (!done) { writer.sync = null; // disable sync on temp files Modified: hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java?rev=758479&r1=758478&r2=758479&view=diff ============================================================================== --- hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java (original) +++ hadoop/core/trunk/src/test/org/apache/hadoop/io/TestSequenceFile.java Thu Mar 26 00:22:18 2009 @@ -307,10 +307,10 @@ public void testSequenceFileMetadata() throws Exception { LOG.info("Testing SequenceFile with metadata"); int count = 1024 * 10; - int megabytes = 1; - int factor = 5; CompressionCodec codec = new DefaultCodec(); Path file = new Path(System.getProperty("test.build.data",".")+"/test.seq.metadata"); + Path sortedFile = + new Path(System.getProperty("test.build.data",".")+"/test.sorted.seq.metadata"); Path recordCompressedFile = new Path(System.getProperty("test.build.data",".")+"/test.rc.seq.metadata"); Path blockCompressedFile = @@ -352,6 +352,14 @@ LOG.info("The retrieved metadata:\n" + aMetadata.toString()); throw new RuntimeException("metadata not match: " + 3); } + // SequenceFile.Sorter + sortMetadataTest(fs, file, sortedFile, theMetadata); + aMetadata = readMetadata(fs, recordCompressedFile); + if (!theMetadata.equals(aMetadata)) { + LOG.info("The original metadata:\n" + theMetadata.toString()); + LOG.info("The retrieved metadata:\n" + aMetadata.toString()); + throw new RuntimeException("metadata not match: " + 4); + } } finally { fs.close(); } @@ -361,7 +369,7 @@ private static SequenceFile.Metadata readMetadata(FileSystem fs, Path file) throws IOException { - LOG.info("reading file: " + file.toString() + "\n"); + LOG.info("reading file: " + file.toString()); SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf); SequenceFile.Metadata meta = reader.getMetadata(); reader.close(); @@ -372,7 +380,7 @@ CompressionType compressionType, CompressionCodec codec, SequenceFile.Metadata metadata) throws IOException { fs.delete(file, true); - LOG.info("creating " + count + " records with metadata and with" + compressionType + + LOG.info("creating " + count + " records with metadata and with " + compressionType + " compression"); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, @@ -388,6 +396,15 @@ writer.close(); } + private static void sortMetadataTest(FileSystem fs, Path unsortedFile, Path sortedFile, SequenceFile.Metadata metadata) + throws IOException { + fs.delete(sortedFile, true); + LOG.info("sorting: " + unsortedFile + " to: " + sortedFile); + final WritableComparator comparator = WritableComparator.get(RandomDatum.class); + SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, comparator, RandomDatum.class, RandomDatum.class, conf, metadata); + sorter.sort(new Path[] { unsortedFile }, sortedFile, false); + } + public void testClose() throws IOException { Configuration conf = new Configuration(); LocalFileSystem fs = new LocalFileSystem();