Author: cutting
Date: Wed Sep 27 14:44:08 2006
New Revision: 450586
URL: http://svn.apache.org/viewvc?view=rev&rev=450586
Log:
HADOOP-508. Fix a bug in FSDataInputStream where incorrect data was sometimes returned after
seeking to a random location. Contributed by Milind.
Added:
lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestSeekBug.java
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FSDataInputStream.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=450586&r1=450585&r2=450586
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed Sep 27 14:44:08 2006
@@ -80,6 +80,10 @@
the FileSystem API, and use it in DFSShell ('bin/hadoop dfs')
commands. (Hairong Kuang via cutting)
+21. HADOOP-508. Fix a bug in FSDataInputStream. Incorrect data was
+ returned after seeking to a random location.
+ (Milind Bhandarkar via cutting)
+
Release 0.6.2 (unreleased)
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FSDataInputStream.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FSDataInputStream.java?view=diff&rev=450586&r1=450585&r2=450586
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FSDataInputStream.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FSDataInputStream.java Wed Sep 27 14:44:08
2006
@@ -193,7 +193,8 @@
public void seek(long desired) throws IOException {
long end = ((PositionCache)in).getPos();
long start = end - this.count;
- if (desired >= start && desired < end) {
+ int avail = this.count - this.pos;
+ if (desired >= start && desired < end && avail > 0) {
this.pos = (int)(desired - start); // can position within buffer
} else {
this.count = 0; // invalidate buffer
Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestSeekBug.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestSeekBug.java?view=auto&rev=450586
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestSeekBug.java (added)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestSeekBug.java Wed Sep 27 14:44:08
2006
@@ -0,0 +1,104 @@
+package org.apache.hadoop.dfs;
+
+import javax.swing.filechooser.FileSystemView;
+import junit.framework.TestCase;
+import java.io.*;
+import java.util.Random;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * This class tests the presence of seek bug as described
+ * in HADOOP-508
+ * @author Milind Bhandarkar
+ */
+public class TestSeekBug extends TestCase {
+ static final long seed = 0xDEADBEEFL;
+ static final int ONEMB = 1 << 20;
+
+ private void writeFile(FileSystem fileSys, Path name) throws IOException {
+ // create and write a file that contains 1MB
+ DataOutputStream stm = fileSys.create(name);
+ byte[] buffer = new byte[ONEMB];
+ Random rand = new Random(seed);
+ rand.nextBytes(buffer);
+ stm.write(buffer);
+ stm.close();
+ }
+
+ private void checkAndEraseData(byte[] actual, int from, byte[] expected, String message)
{
+ for (int idx = 0; idx < actual.length; idx++) {
+ this.assertEquals(message+" byte "+(from+idx)+" differs. expected "+
+ expected[from+idx]+" actual "+actual[idx],
+ actual[idx], expected[from+idx]);
+ actual[idx] = 0;
+ }
+ }
+
+ private void seekReadFile(FileSystem fileSys, Path name) throws IOException {
+ FSInputStream stmRaw = fileSys.openRaw(name);
+ FSDataInputStream stm = new FSDataInputStream(stmRaw, 4096);
+ byte[] expected = new byte[ONEMB];
+ Random rand = new Random(seed);
+ rand.nextBytes(expected);
+
+ // First read 128 bytes to set count in BufferedInputStream
+ byte[] actual = new byte[128];
+ stm.read(actual, 0, actual.length);
+ // Now read a byte array that is bigger than the internal buffer
+ actual = new byte[100000];
+ stm.read(actual, 0, actual.length);
+ checkAndEraseData(actual, 128, expected, "First Read Test");
+ // now do a small seek, within the range that is already read
+ stm.seek(96036); // 4 byte seek
+ actual = new byte[128];
+ stm.read(actual, 0, actual.length);
+ checkAndEraseData(actual, 96036, expected, "Seek Bug");
+ // all done
+ stm.close();
+ }
+
+ private void cleanupFile(FileSystem fileSys, Path name) throws IOException {
+ assertTrue(fileSys.exists(name));
+ fileSys.delete(name);
+ assertTrue(!fileSys.exists(name));
+ }
+
+ /**
+ * Test if the seek bug exists in FSDataInputStream in DFS.
+ */
+ public void testSeekBugDFS() throws IOException {
+ Configuration conf = new Configuration();
+ MiniDFSCluster cluster = new MiniDFSCluster(65312, conf, false);
+ FileSystem fileSys = cluster.getFileSystem();
+ try {
+ Path file1 = new Path("seektest.dat");
+ writeFile(fileSys, file1);
+ seekReadFile(fileSys, file1);
+ cleanupFile(fileSys, file1);
+ } finally {
+ fileSys.close();
+ cluster.shutdown();
+ }
+ }
+
+ /**
+ * Tests if the seek bug exists in FSDataInputStream in LocalFS.
+ */
+ public void testSeekBugLocalFS() throws IOException {
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.getNamed("local", conf);
+ try {
+ Path file1 = new Path("build/test/data", "seektest.dat");
+ writeFile(fileSys, file1);
+ seekReadFile(fileSys, file1);
+ cleanupFile(fileSys, file1);
+ } finally {
+ fileSys.close();
+ }
+ }
+}
|