hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cdoug...@apache.org
Subject svn commit: r816088 - in /hadoop/mapreduce/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/LineRecordReader.java src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java src/test/mapred/org/apache/hadoop/mapred/TestTextInputFormat.java
Date Thu, 17 Sep 2009 08:06:35 GMT
Author: cdouglas
Date: Thu Sep 17 08:06:34 2009
New Revision: 816088

URL: http://svn.apache.org/viewvc?rev=816088&view=rev
Log:
MAPREDUCE-946. Fix a regression in LineRecordReader where the
maxBytesToConsume parameter is not set correctly.

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/LineRecordReader.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTextInputFormat.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=816088&r1=816087&r2=816088&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Thu Sep 17 08:06:34 2009
@@ -609,3 +609,6 @@
     MAPREDUCE-648. Fix two distcp bugs: (1) it should not launch a job if all
     src paths are directories, and (2) it does not skip copying when updating
     a single file.  (Ravi Gummadi via szetszwo)
+
+    MAPREDUCE-946. Fix a regression in LineRecordReader where the
+    maxBytesToConsume parameter is not set correctly. (cdouglas)

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/LineRecordReader.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/LineRecordReader.java?rev=816088&r1=816087&r2=816088&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/LineRecordReader.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/LineRecordReader.java Thu Sep
17 08:06:34 2009
@@ -112,7 +112,7 @@
     // because we always (except the last split) read one extra line in
     // next() method.
     if (start != 0) {
-      start += in.readLine(new Text(), 0, maxBytesToConsume());
+      start += in.readLine(new Text(), 0, maxBytesToConsume(start));
     }
     this.pos = start;
   }
@@ -147,12 +147,16 @@
     return new Text();
   }
   
-  private boolean isCompressedInput() { return (codec != null); }
-  
-  private int maxBytesToConsume() {
-    return (isCompressedInput()) ? Integer.MAX_VALUE
-                           : (int) Math.min(Integer.MAX_VALUE, (end - start));
+  private boolean isCompressedInput() {
+    return (codec != null);
+  }
+
+  private int maxBytesToConsume(long pos) {
+    return isCompressedInput()
+      ? Integer.MAX_VALUE
+      : (int) Math.min(Integer.MAX_VALUE, end - pos);
   }
+
   private long getFilePosition() throws IOException {
     long retVal;
     if (isCompressedInput() && null != filePosition) {
@@ -174,7 +178,7 @@
       key.set(pos);
 
       int newSize = in.readLine(value, maxLineLength,
-                                Math.max(maxBytesToConsume(), maxLineLength));
+          Math.max(maxBytesToConsume(pos), maxLineLength));
       if (newSize == 0) {
         return false;
       }

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java?rev=816088&r1=816087&r2=816088&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
(original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
Thu Sep 17 08:06:34 2009
@@ -104,7 +104,7 @@
     // because we always (except the last split) read one extra line in
     // next() method.
     if (start != 0) {
-      start += in.readLine(new Text(), 0, maxBytesToConsume());
+      start += in.readLine(new Text(), 0, maxBytesToConsume(start));
     }
     this.pos = start;
   }
@@ -113,12 +113,12 @@
     return (codec != null);
   }
 
-  private int maxBytesToConsume() {
+  private int maxBytesToConsume(long pos) {
     return isCompressedInput()
       ? Integer.MAX_VALUE
-      : (int) Math.min(Integer.MAX_VALUE, (end - start));
+      : (int) Math.min(Integer.MAX_VALUE, end - pos);
   }
-  
+
   private long getFilePosition() throws IOException {
     long retVal;
     if (isCompressedInput() && null != filePosition) {
@@ -142,7 +142,7 @@
     // split limit i.e. (end - 1)
     while (getFilePosition() <= end) {
       newSize = in.readLine(value, maxLineLength,
-                            Math.max(maxBytesToConsume(), maxLineLength));
+          Math.max(maxBytesToConsume(pos), maxLineLength));
       if (newSize == 0) {
         break;
       }

Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTextInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTextInputFormat.java?rev=816088&r1=816087&r2=816088&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTextInputFormat.java
(original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTextInputFormat.java
Thu Sep 17 08:06:34 2009
@@ -20,14 +20,17 @@
 
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.util.BitSet;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.io.LongWritable;
@@ -331,6 +334,37 @@
     }
   }
 
+  @Test
+  public void testMRMaxLine() throws Exception {
+    final int MAXPOS = 1024 * 1024;
+    final int MAXLINE = 10 * 1024;
+    final int BUF = 64 * 1024;
+    final InputStream infNull = new InputStream() {
+      int position = 0;
+      final int MAXPOSBUF = 1024 * 1024 + BUF; // max LRR pos + LineReader buf
+      @Override
+      public int read() {
+        ++position;
+        return 0;
+      }
+      @Override
+      public int read(byte[] b) {
+        assertTrue("Read too many bytes from the stream", position < MAXPOSBUF);
+        Arrays.fill(b, (byte) 0);
+        position += b.length;
+        return b.length;
+      }
+    };
+    final LongWritable key = new LongWritable();
+    final Text val = new Text();
+    LOG.info("Reading a line from /dev/null");
+    final Configuration conf = new Configuration(false);
+    conf.setInt("mapred.linerecordreader.maxlength", MAXLINE);
+    conf.setInt("io.file.buffer.size", BUF); // used by LRR
+    final LineRecordReader lrr = new LineRecordReader(infNull, 0, MAXPOS, conf);
+    assertFalse("Read a line from null", lrr.next(key, val));
+  }
+
   private static void writeFile(FileSystem fs, Path name, 
                                 CompressionCodec codec,
                                 String contents) throws IOException {



Mime
View raw message