hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sur...@apache.org
Subject svn commit: r1335703 - in /hadoop/common/branches/branch-1: CHANGES.txt src/core/org/apache/hadoop/io/BloomMapFile.java src/test/org/apache/hadoop/io/TestBloomMapFile.java
Date Tue, 08 May 2012 19:08:54 GMT
Author: suresh
Date: Tue May  8 19:08:53 2012
New Revision: 1335703

URL: http://svn.apache.org/viewvc?rev=1335703&view=rev
Log:
HADOOP-6546. BloomMapFile can return false negatives. Contributed by Clark Jefcoat.

Modified:
    hadoop/common/branches/branch-1/CHANGES.txt
    hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/BloomMapFile.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/io/TestBloomMapFile.java

Modified: hadoop/common/branches/branch-1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1335703&r1=1335702&r2=1335703&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1/CHANGES.txt Tue May  8 19:08:53 2012
@@ -228,6 +228,9 @@ Release 1.1.0 - unreleased
     MAPREDUCE-4088. Task stuck in JobLocalizer prevented other tasks on the
     same node from (Ravi Prakash via bobby)
 
+    HADOOP-6546. BloomMapFile can return false negatives. (Clark Jefcoat
+    via suresh)
+
 Release 1.0.3 - 2012.05.07
 
   NEW FEATURES

Modified: hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/BloomMapFile.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/BloomMapFile.java?rev=1335703&r1=1335702&r2=1335703&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/BloomMapFile.java (original)
+++ hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/BloomMapFile.java Tue May
 8 19:08:53 2012
@@ -58,6 +58,16 @@ public class BloomMapFile {
     fs.delete(bloom, true);
     fs.delete(dir, true);
   }
+
+  private static byte[] byteArrayForBloomKey(DataOutputBuffer buf) {
+    int cleanLength = buf.getLength();
+    byte [] ba = buf.getData();
+    if (cleanLength != ba.length) {
+      ba = new byte[cleanLength];
+      System.arraycopy(buf.getData(), 0, ba, 0, cleanLength);
+    }
+    return ba;
+  }
   
   public static class Writer extends MapFile.Writer {
     private DynamicBloomFilter bloomFilter;
@@ -163,7 +173,7 @@ public class BloomMapFile {
       super.append(key, val);
       buf.reset();
       key.write(buf);
-      bloomKey.set(buf.getData(), 1.0);
+      bloomKey.set(byteArrayForBloomKey(buf), 1.0);
       bloomFilter.add(bloomKey);
     }
 
@@ -228,7 +238,7 @@ public class BloomMapFile {
       }
       buf.reset();
       key.write(buf);
-      bloomKey.set(buf.getData(), 1.0);
+      bloomKey.set(byteArrayForBloomKey(buf), 1.0);
       return bloomFilter.membershipTest(bloomKey);
     }
     

Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/io/TestBloomMapFile.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/io/TestBloomMapFile.java?rev=1335703&r1=1335702&r2=1335703&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/io/TestBloomMapFile.java (original)
+++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/io/TestBloomMapFile.java Tue
May  8 19:08:53 2012
@@ -18,6 +18,10 @@
 
 package org.apache.hadoop.io;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -67,4 +71,41 @@ public class TestBloomMapFile extends Te
     assertTrue(falsePos < 2);
   }
 
+  private void checkMembershipVaryingSizedKeys(String name, List<Text> keys) throws
Exception {
+    Path dirName = new Path(System.getProperty("test.build.data",".") +
+        name + ".bloommapfile"); 
+    FileSystem fs = FileSystem.getLocal(conf);
+    Path qualifiedDirName = fs.makeQualified(dirName);
+    BloomMapFile.Writer writer = new BloomMapFile.Writer(conf, fs,
+      qualifiedDirName.toString(), Text.class, NullWritable.class);
+    for (Text key : keys) {
+      writer.append(key, NullWritable.get());
+    }
+    writer.close();
+
+    // will check for membership in the opposite order of how keys were inserted
+    BloomMapFile.Reader reader = new BloomMapFile.Reader(fs,
+        qualifiedDirName.toString(), conf);
+    Collections.reverse(keys);
+    for (Text key : keys) {
+      assertTrue("False negative for existing key " + key, reader.probablyHasKey(key));
+    }
+    reader.close();
+    fs.delete(qualifiedDirName, true);
+  }
+
+  public void testMembershipVaryingSizedKeysTest1() throws Exception {
+    ArrayList<Text> list = new ArrayList<Text>();
+    list.add(new Text("A"));
+    list.add(new Text("BB"));
+    checkMembershipVaryingSizedKeys(getName(), list);
+  }
+
+  public void testMembershipVaryingSizedKeysTest2() throws Exception {
+    ArrayList<Text> list = new ArrayList<Text>();
+    list.add(new Text("AA"));
+    list.add(new Text("B"));
+    checkMembershipVaryingSizedKeys(getName(), list);
+  }
+
 }



Mime
View raw message