hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cdoug...@apache.org
Subject svn commit: r796148 - in /hadoop/mapreduce/trunk: CHANGES.txt src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
Date Tue, 21 Jul 2009 05:16:58 GMT
Author: cdouglas
Date: Tue Jul 21 05:16:57 2009
New Revision: 796148

URL: http://svn.apache.org/viewvc?rev=796148&view=rev
Log:
MAPREDUCE-772. Merge HADOOP-4010 changes to LineRecordReader into mapreduce package. Contributed
by Abdul Qadeer

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=796148&r1=796147&r2=796148&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Tue Jul 21 05:16:57 2009
@@ -126,6 +126,9 @@
     MAPREDUCE-739. Allow relative paths to be created in archives. (Mahadev
     Konar via cdouglas)
 
+    MAPREDUCE-772. Merge HADOOP-4010 changes to LineRecordReader into mapreduce
+    package. (Abdul Qadeer via cdouglas)
+
   BUG FIXES
     MAPREDUCE-703. Sqoop requires dependency on hsqldb in ivy.
     (Aaron Kimball via matei)

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java?rev=796148&r1=796147&r2=796148&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
(original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
Tue Jul 21 05:16:57 2009
@@ -70,21 +70,19 @@
     // open the file and seek to the start of the split
     FileSystem fs = file.getFileSystem(job);
     FSDataInputStream fileIn = fs.open(split.getPath());
-    boolean skipFirstLine = false;
     if (codec != null) {
       in = new LineReader(codec.createInputStream(fileIn), job);
       end = Long.MAX_VALUE;
     } else {
-      if (start != 0) {
-        skipFirstLine = true;
-        --start;
-        fileIn.seek(start);
-      }
+      fileIn.seek(start);
       in = new LineReader(fileIn, job);
     }
-    if (skipFirstLine) {  // skip first line and re-establish "start".
-      start += in.readLine(new Text(), 0,
-                           (int)Math.min((long)Integer.MAX_VALUE, end - start));
+    // If this is not the first split, we always throw away first record
+    // because we always (except the last split) read one extra line in
+    // next() method.
+    if (start != 0) {
+      start += in.readLine(new Text(), 0, (int) Math.min(
+          (long) Integer.MAX_VALUE, end - start));
     }
     this.pos = start;
   }
@@ -98,7 +96,9 @@
       value = new Text();
     }
     int newSize = 0;
-    while (pos < end) {
+    // We always read one extra line, which lies outside the upper
+    // split limit i.e. (end - 1)
+    while (pos <= end) {
       newSize = in.readLine(value, maxLineLength,
                             Math.max((int)Math.min(Integer.MAX_VALUE, end-pos),
                                      maxLineLength));



Mime
View raw message