Return-Path: Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: (qmail 15835 invoked from network); 13 Jan 2010 10:47:15 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 13 Jan 2010 10:47:15 -0000 Received: (qmail 26183 invoked by uid 500); 13 Jan 2010 10:47:15 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 26122 invoked by uid 500); 13 Jan 2010 10:47:15 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 26113 invoked by uid 99); 13 Jan 2010 10:47:15 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 13 Jan 2010 10:47:15 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 13 Jan 2010 10:47:12 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 4916E23889E9; Wed, 13 Jan 2010 10:46:51 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r898712 - in /hadoop/common/branches/branch-0.21: CHANGES.txt src/java/org/apache/hadoop/io/compress/GzipCodec.java src/test/core/org/apache/hadoop/io/compress/TestCodec.java Date: Wed, 13 Jan 2010 10:46:51 -0000 To: common-commits@hadoop.apache.org From: cdouglas@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100113104651.4916E23889E9@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: cdouglas Date: Wed Jan 13 10:46:50 2010 New Revision: 898712 URL: http://svn.apache.org/viewvc?rev=898712&view=rev Log: HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in GzipCodec. Contributed by Aaron Kimball Modified: hadoop/common/branches/branch-0.21/CHANGES.txt hadoop/common/branches/branch-0.21/src/java/org/apache/hadoop/io/compress/GzipCodec.java hadoop/common/branches/branch-0.21/src/test/core/org/apache/hadoop/io/compress/TestCodec.java Modified: hadoop/common/branches/branch-0.21/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.21/CHANGES.txt?rev=898712&r1=898711&r2=898712&view=diff ============================================================================== --- hadoop/common/branches/branch-0.21/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.21/CHANGES.txt Wed Jan 13 10:46:50 2010 @@ -1163,7 +1163,9 @@ HADOOP-6460. Reinitializes buffers used for serializing responses in ipc server on exceeding maximum response size to free up Java heap. (suresh) - + + HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in + GzipCodec. (Aaron Kimball via cdouglas) Release 0.20.1 - 2009-09-01 Modified: hadoop/common/branches/branch-0.21/src/java/org/apache/hadoop/io/compress/GzipCodec.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.21/src/java/org/apache/hadoop/io/compress/GzipCodec.java?rev=898712&r1=898711&r2=898712&view=diff ============================================================================== --- hadoop/common/branches/branch-0.21/src/java/org/apache/hadoop/io/compress/GzipCodec.java (original) +++ hadoop/common/branches/branch-0.21/src/java/org/apache/hadoop/io/compress/GzipCodec.java Wed Jan 13 10:46:50 2010 @@ -165,7 +165,7 @@ public Class getCompressorType() { return ZlibFactory.isNativeZlibLoaded(conf) ? GzipZlibCompressor.class - : BuiltInZlibDeflater.class; + : null; } public CompressionInputStream createInputStream(InputStream in) @@ -196,7 +196,7 @@ public Class getDecompressorType() { return ZlibFactory.isNativeZlibLoaded(conf) ? GzipZlibDecompressor.class - : BuiltInZlibInflater.class; + : null; } public String getDefaultExtension() { Modified: hadoop/common/branches/branch-0.21/src/test/core/org/apache/hadoop/io/compress/TestCodec.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.21/src/test/core/org/apache/hadoop/io/compress/TestCodec.java?rev=898712&r1=898711&r2=898712&view=diff ============================================================================== --- hadoop/common/branches/branch-0.21/src/test/core/org/apache/hadoop/io/compress/TestCodec.java (original) +++ hadoop/common/branches/branch-0.21/src/test/core/org/apache/hadoop/io/compress/TestCodec.java Wed Jan 13 10:46:50 2010 @@ -19,13 +19,24 @@ import java.io.BufferedInputStream; import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; import java.util.Arrays; import java.util.Random; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -41,6 +52,9 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.CompressorStream; +import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater; +import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater; import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel; import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy; import org.apache.hadoop.io.compress.zlib.ZlibFactory; @@ -418,4 +432,154 @@ } + @Test + public void testCodecPoolAndGzipDecompressor() { + // BuiltInZlibInflater should not be used as the GzipCodec decompressor. + // Assert that this is the case. + + // Don't use native libs for this test. + Configuration conf = new Configuration(); + conf.setBoolean("hadoop.native.lib", false); + assertFalse("ZlibFactory is using native libs against request", + ZlibFactory.isNativeZlibLoaded(conf)); + + // This should give us a BuiltInZlibInflater. + Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf); + assertNotNull("zlibDecompressor is null!", zlibDecompressor); + assertTrue("ZlibFactory returned unexpected inflator", + zlibDecompressor instanceof BuiltInZlibInflater); + + // Asking for a decompressor directly from GzipCodec should return null; + // its createOutputStream() just wraps the existing stream in a + // java.util.zip.GZIPOutputStream. + CompressionCodecFactory ccf = new CompressionCodecFactory(conf); + CompressionCodec codec = ccf.getCodec(new Path("foo.gz")); + assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec); + Decompressor codecDecompressor = codec.createDecompressor(); + if (null != codecDecompressor) { + fail("Got non-null codecDecompressor: " + codecDecompressor); + } + + // Asking the CodecPool for a decompressor for GzipCodec + // should return null as well. + Decompressor poolDecompressor = CodecPool.getDecompressor(codec); + if (null != poolDecompressor) { + fail("Got non-null poolDecompressor: " + poolDecompressor); + } + + // If we then ensure that the pool is populated... + CodecPool.returnDecompressor(zlibDecompressor); + + // Asking the pool another time should still not bind this to GzipCodec. + poolDecompressor = CodecPool.getDecompressor(codec); + if (null != poolDecompressor) { + fail("Second time, got non-null poolDecompressor: " + + poolDecompressor); + } + } + + @Test + public void testGzipCodecRead() throws IOException { + // Create a gzipped file and try to read it back, using a decompressor + // from the CodecPool. + + // Don't use native libs for this test. + Configuration conf = new Configuration(); + conf.setBoolean("hadoop.native.lib", false); + assertFalse("ZlibFactory is using native libs against request", + ZlibFactory.isNativeZlibLoaded(conf)); + + // Ensure that the CodecPool has a BuiltInZlibInflater in it. + Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf); + assertNotNull("zlibDecompressor is null!", zlibDecompressor); + assertTrue("ZlibFactory returned unexpected inflator", + zlibDecompressor instanceof BuiltInZlibInflater); + CodecPool.returnDecompressor(zlibDecompressor); + + // Now create a GZip text file. + String tmpDir = System.getProperty("test.build.data", "/tmp/"); + Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz"); + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( + new GZIPOutputStream(new FileOutputStream(f.toString())))); + final String msg = "This is the message in the file!"; + bw.write(msg); + bw.close(); + + // Now read it back, using the CodecPool to establish the + // decompressor to use. + CompressionCodecFactory ccf = new CompressionCodecFactory(conf); + CompressionCodec codec = ccf.getCodec(f); + Decompressor decompressor = CodecPool.getDecompressor(codec); + FileSystem fs = FileSystem.getLocal(conf); + InputStream is = fs.open(f); + is = codec.createInputStream(is, decompressor); + BufferedReader br = new BufferedReader(new InputStreamReader(is)); + String line = br.readLine(); + assertEquals("Didn't get the same message back!", msg, line); + br.close(); + } + + private void verifyGzipFile(String filename, String msg) throws IOException { + BufferedReader r = new BufferedReader(new InputStreamReader( + new GZIPInputStream(new FileInputStream(filename)))); + try { + String line = r.readLine(); + assertEquals("Got invalid line back from " + filename, msg, line); + } finally { + r.close(); + new File(filename).delete(); + } + } + + @Test + public void testGzipCodecWrite() throws IOException { + // Create a gzipped file using a compressor from the CodecPool, + // and try to read it back via the regular GZIPInputStream. + + // Don't use native libs for this test. + Configuration conf = new Configuration(); + conf.setBoolean("hadoop.native.lib", false); + assertFalse("ZlibFactory is using native libs against request", + ZlibFactory.isNativeZlibLoaded(conf)); + + // Ensure that the CodecPool has a BuiltInZlibDeflater in it. + Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf); + assertNotNull("zlibCompressor is null!", zlibCompressor); + assertTrue("ZlibFactory returned unexpected deflator", + zlibCompressor instanceof BuiltInZlibDeflater); + CodecPool.returnCompressor(zlibCompressor); + + // Create a GZIP text file via the Compressor interface. + CompressionCodecFactory ccf = new CompressionCodecFactory(conf); + CompressionCodec codec = ccf.getCodec(new Path("foo.gz")); + assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec); + + final String msg = "This is the message we are going to compress."; + final String tmpDir = System.getProperty("test.build.data", "/tmp/"); + final String fileName = new Path(new Path(tmpDir), + "testGzipCodecWrite.txt.gz").toString(); + + BufferedWriter w = null; + Compressor gzipCompressor = CodecPool.getCompressor(codec); + if (null != gzipCompressor) { + // If it gives us back a Compressor, we should be able to use this + // to write files we can then read back with Java's gzip tools. + OutputStream os = new CompressorStream(new FileOutputStream(fileName), + gzipCompressor); + w = new BufferedWriter(new OutputStreamWriter(os)); + w.write(msg); + w.close(); + CodecPool.returnCompressor(gzipCompressor); + + verifyGzipFile(fileName, msg); + } + + // Create a gzip text file via codec.getOutputStream(). + w = new BufferedWriter(new OutputStreamWriter( + codec.createOutputStream(new FileOutputStream(fileName)))); + w.write(msg); + w.close(); + + verifyGzipFile(fileName, msg); + } }