hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject [1/5] hive git commit: HIVE-12055. Move WriterImpl over to orc module.
Date Mon, 14 Dec 2015 21:36:14 GMT
Repository: hive
Updated Branches:
  refs/heads/master 49dc6452a -> 06e39ebe0


http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java b/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
deleted file mode 100644
index 151f30d..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
+++ /dev/null
@@ -1,335 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.util;
-
-/**
- * Estimation of memory footprint of object
- */
-public enum JavaDataModel {
-
-  JAVA32 {
-    @Override
-    public int object() {
-      return JAVA32_OBJECT;
-    }
-
-    @Override
-    public int array() {
-      return JAVA32_ARRAY;
-    }
-
-    @Override
-    public int ref() {
-      return JAVA32_REF;
-    }
-
-    @Override
-    public int hashMap(int entry) {
-      // base  = JAVA32_OBJECT + PRIMITIVES1 * 4 + JAVA32_FIELDREF * 3 + JAVA32_ARRAY;
-      // entry = JAVA32_OBJECT + JAVA32_FIELDREF + PRIMITIVES1
-      return hashMapBase() + hashMapEntry() * entry;
-    }
-
-    @Override
-    public int hashMapBase() {
-      return 64;
-    }
-
-    @Override
-    public int hashMapEntry() {
-      return 24;
-    }
-
-    @Override
-    public int hashSet(int entry) {
-      // hashMap += JAVA32_OBJECT
-      return hashSetBase() + hashSetEntry() * entry;
-    }
-
-    @Override
-    public int hashSetBase() {
-      return 80;
-    }
-
-    @Override
-    public int hashSetEntry() {
-      return 24;
-    }
-
-    @Override
-    public int linkedHashMap(int entry) {
-      // hashMap += JAVA32_FIELDREF + PRIMITIVES1
-      // hashMap.entry += JAVA32_FIELDREF * 2
-      return 72 + 32 * entry;
-    }
-
-    @Override
-    public int linkedList(int entry) {
-      // base  = JAVA32_OBJECT + PRIMITIVES1 * 2 + JAVA32_FIELDREF;
-      // entry = JAVA32_OBJECT + JAVA32_FIELDREF * 2
-      return linkedListBase() + linkedListEntry() * entry;
-     }
-
-     @Override
-     public int linkedListBase() {
-       return 28;
-     }
-
-     @Override
-     public int linkedListEntry() {
-       return 24;
-     }
-
-    @Override
-    public int arrayList() {
-      // JAVA32_OBJECT + PRIMITIVES1 * 2 + JAVA32_ARRAY;
-      return 44;
-    }
-
-    @Override
-    public int memoryAlign() {
-      return 8;
-    }
-  }, JAVA64 {
-    @Override
-    public int object() {
-      return JAVA64_OBJECT;
-    }
-
-    @Override
-    public int array() {
-      return JAVA64_ARRAY;
-    }
-
-    @Override
-    public int ref() {
-      return JAVA64_REF;
-    }
-
-    @Override
-    public int hashMap(int entry) {
-      // base  = JAVA64_OBJECT + PRIMITIVES1 * 4 + JAVA64_FIELDREF * 3 + JAVA64_ARRAY;
-      // entry = JAVA64_OBJECT + JAVA64_FIELDREF + PRIMITIVES1
-      return hashMapBase() + hashMapEntry() * entry;
-    }
-
-    @Override
-    public int hashMapBase() {
-      return 112;
-    }
-
-
-    @Override
-    public int hashMapEntry() {
-      return 44;
-    }
-
-    @Override
-    public int hashSet(int entry) {
-      // hashMap += JAVA64_OBJECT
-      return hashSetBase() + hashSetEntry() * entry;
-     }
-
-     @Override
-     public int hashSetBase() {
-       return 144;
-     }
-
-     @Override
-     public int hashSetEntry() {
-       return 44;
-     }
-
-    @Override
-    public int linkedHashMap(int entry) {
-      // hashMap += JAVA64_FIELDREF + PRIMITIVES1
-      // hashMap.entry += JAVA64_FIELDREF * 2
-      return 128 + 60 * entry;
-    }
-
-    @Override
-    public int linkedList(int entry) {
-      // base  = JAVA64_OBJECT + PRIMITIVES1 * 2 + JAVA64_FIELDREF;
-      // entry = JAVA64_OBJECT + JAVA64_FIELDREF * 2
-      return linkedListBase() + linkedListEntry() * entry;
-     }
-
-     @Override
-     public int linkedListBase() {
-       return 48;
-     }
-
-     @Override
-     public int linkedListEntry() {
-       return 48;
-     }
-
-    @Override
-    public int arrayList() {
-      // JAVA64_OBJECT + PRIMITIVES1 * 2 + JAVA64_ARRAY;
-      return 80;
-    }
-
-    @Override
-    public int memoryAlign() {
-      return 8;
-    }
-  };
-
-  public abstract int object();
-  public abstract int array();
-  public abstract int ref();
-  public abstract int hashMap(int entry);
-  public abstract int hashMapBase();
-  public abstract int hashMapEntry();
-  public abstract int hashSetBase();
-  public abstract int hashSetEntry();
-  public abstract int hashSet(int entry);
-  public abstract int linkedHashMap(int entry);
-  public abstract int linkedListBase();
-  public abstract int linkedListEntry();
-  public abstract int linkedList(int entry);
-  public abstract int arrayList();
-  public abstract int memoryAlign();
-
-  // ascii string
-  public int lengthFor(String string) {
-    return lengthForStringOfLength(string.length());
-  }
-
-  public int lengthForRandom() {
-    // boolean + double + AtomicLong
-    return object() + primitive1() + primitive2() + object() + primitive2();
-  }
-
-  public int primitive1() {
-    return PRIMITIVES1;
-  }
-  public int primitive2() {
-    return PRIMITIVES2;
-  }
-
-  public static int alignUp(int value, int align) {
-    return (value + align - 1) & ~(align - 1);
-  }
-
-  public static final int JAVA32_META = 12;
-  public static final int JAVA32_ARRAY_META = 16;
-  public static final int JAVA32_REF = 4;
-  public static final int JAVA32_OBJECT = 16;   // JAVA32_META + JAVA32_REF
-  public static final int JAVA32_ARRAY = 20;    // JAVA32_ARRAY_META + JAVA32_REF
-
-  public static final int JAVA64_META = 24;
-  public static final int JAVA64_ARRAY_META = 32;
-  public static final int JAVA64_REF = 8;
-  public static final int JAVA64_OBJECT = 32;   // JAVA64_META + JAVA64_REF
-  public static final int JAVA64_ARRAY = 40;    // JAVA64_ARRAY_META + JAVA64_REF
-
-  public static final int PRIMITIVES1 = 4;      // void, boolean, byte, short, int, float
-  public static final int PRIMITIVES2 = 8;      // long, double
-
-  public static final int PRIMITIVE_BYTE = 1;    // byte
-
-  private static JavaDataModel current;
-
-  public static JavaDataModel get() {
-    if (current != null) {
-      return current;
-    }
-    try {
-      String props = System.getProperty("sun.arch.data.model");
-      if ("32".equals(props)) {
-        return current = JAVA32;
-      }
-    } catch (Exception e) {
-      // ignore
-    }
-    // TODO: separate model is needed for compressedOops, which can be guessed from memory size.
-    return current = JAVA64;
-  }
-
-  public static int round(int size) {
-    JavaDataModel model = get();
-    if (model == JAVA32 || size % 8 == 0) {
-      return size;
-    }
-    return ((size + 8) >> 3) << 3;
-  }
-
-  private int lengthForPrimitiveArrayOfSize(int primitiveSize, int length) {
-    return alignUp(array() + primitiveSize*length, memoryAlign());
-  }
-
-  public int lengthForByteArrayOfSize(int length) {
-    return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length);
-  }
-  public int lengthForObjectArrayOfSize(int length) {
-    return lengthForPrimitiveArrayOfSize(ref(), length);
-  }
-  public int lengthForLongArrayOfSize(int length) {
-    return lengthForPrimitiveArrayOfSize(primitive2(), length);
-  }
-  public int lengthForDoubleArrayOfSize(int length) {
-    return lengthForPrimitiveArrayOfSize(primitive2(), length);
-  }
-  public int lengthForIntArrayOfSize(int length) {
-    return lengthForPrimitiveArrayOfSize(primitive1(), length);
-  }
-  public int lengthForBooleanArrayOfSize(int length) {
-    return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length);
-  }
-  public int lengthForTimestampArrayOfSize(int length) {
-    return lengthForPrimitiveArrayOfSize(lengthOfTimestamp(), length);
-  }
-  public int lengthForDateArrayOfSize(int length) {
-    return lengthForPrimitiveArrayOfSize(lengthOfDate(), length);
-  }
-  public int lengthForDecimalArrayOfSize(int length) {
-    return lengthForPrimitiveArrayOfSize(lengthOfDecimal(), length);
-  }
-
-  public int lengthOfDecimal() {
-    // object overhead + 8 bytes for intCompact + 4 bytes for precision
-    // + 4 bytes for scale + size of BigInteger
-    return object() + 2 * primitive2() + lengthOfBigInteger();
-  }
-
-  private int lengthOfBigInteger() {
-    // object overhead + 4 bytes for bitCount + 4 bytes for bitLength
-    // + 4 bytes for firstNonzeroByteNum + 4 bytes for firstNonzeroIntNum +
-    // + 4 bytes for lowestSetBit + 5 bytes for size of magnitude (since max precision
-    // is only 38 for HiveDecimal) + 7 bytes of padding (since java memory allocations
-    // are 8 byte aligned)
-    return object() + 4 * primitive2();
-  }
-
-  public int lengthOfTimestamp() {
-    // object overhead + 4 bytes for int (nanos) + 4 bytes of padding
-    return object() + primitive2();
-  }
-
-  public int lengthOfDate() {
-    // object overhead + 8 bytes for long (fastTime) + 16 bytes for cdate
-    return object() + 3 * primitive2();
-  }
-
-  public int lengthForStringOfLength(int strLen) {
-    return object() + primitive1() * 3 + array() + strLen;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
index 40674ea..554033c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
@@ -153,8 +153,14 @@ public class TestFileDump {
           (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
     conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.ZLIB, 10000, 1000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .inspector(inspector)
+            .batchSize(1000)
+            .compress(CompressionKind.ZLIB)
+            .stripeSize(100000)
+            .rowIndexStride(1000));
     Random r1 = new Random(1);
     String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
         "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
@@ -263,8 +269,15 @@ public class TestFileDump {
     Configuration conf = new Configuration();
     conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
     conf.setFloat(HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, 0.49f);
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.ZLIB, 10000, 1000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .batchSize(1000)
+            .inspector(inspector)
+            .stripeSize(100000)
+            .compress(CompressionKind.ZLIB)
+            .rowIndexStride(1000)
+            .bufferSize(10000));
     Random r1 = new Random(1);
     String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
         "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
@@ -319,6 +332,7 @@ public class TestFileDump {
         .compress(CompressionKind.ZLIB)
         .bufferSize(10000)
         .rowIndexStride(1000)
+        .batchSize(1000)
         .bloomFilterColumns("S");
     Writer writer = OrcFile.createWriter(testFilePath, options);
     Random r1 = new Random(1);
@@ -368,7 +382,8 @@ public class TestFileDump {
         .bufferSize(10000)
         .rowIndexStride(1000)
         .bloomFilterColumns("l")
-        .bloomFilterFpp(0.01);
+        .bloomFilterFpp(0.01)
+        .batchSize(1000);
     Writer writer = OrcFile.createWriter(testFilePath, options);
     Random r1 = new Random(1);
     String[] words = new String[]{"It", "was", "the", "best", "of", "times,",

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
index 2fd13c7..f41a7ba 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
@@ -1215,7 +1215,7 @@ public class TestNewIntegerEncoding {
         .encodingStrategy(encodingStrategy));
 
     List<Timestamp> tslist = Lists.newArrayList();
-    tslist.add(Timestamp.valueOf("9999-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2099-01-01 00:00:00"));
     tslist.add(Timestamp.valueOf("2003-01-01 00:00:00"));
     tslist.add(Timestamp.valueOf("1999-01-01 00:00:00"));
     tslist.add(Timestamp.valueOf("1995-01-01 00:00:00"));

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index ebe3096..a7e657c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -655,7 +655,8 @@ public class TestOrcFile {
         OrcFile.writerOptions(conf)
             .inspector(inspector)
             .stripeSize(100000)
-            .bufferSize(10000));
+            .bufferSize(10000)
+            .batchSize(1000));
     for (int i = 0; i < 11000; i++) {
       if (i >= 5000) {
         if (i >= 10000) {
@@ -1260,6 +1261,7 @@ public class TestOrcFile {
                                          .inspector(inspector)
                                          .stripeSize(1000)
                                          .compress(CompressionKind.NONE)
+                                         .batchSize(1000)
                                          .bufferSize(100)
                                          .blockPadding(false));
     OrcStruct row = new OrcStruct(3);
@@ -1835,8 +1837,9 @@ public class TestOrcFile {
     @Override
     public void addedRow(int count) throws IOException {
       rows += count;
-      if (rows % 100 == 0) {
+      if (rows >= 100) {
         callback.checkMemory(rate);
+        rows = 0;
       }
     }
   }
@@ -1858,6 +1861,7 @@ public class TestOrcFile {
                                          .bufferSize(100)
                                          .rowIndexStride(0)
                                          .memory(memory)
+                                         .batchSize(100)
                                          .version(OrcFile.Version.V_0_11));
     assertEquals(testFilePath, memory.path);
     for(int i=0; i < 2500; ++i) {
@@ -1894,6 +1898,7 @@ public class TestOrcFile {
                                          .bufferSize(100)
                                          .rowIndexStride(0)
                                          .memory(memory)
+                                         .batchSize(100)
                                          .version(OrcFile.Version.V_0_12));
     assertEquals(testFilePath, memory.path);
     for(int i=0; i < 2500; ++i) {

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
index 966621c..ab1d2aa 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
@@ -867,7 +867,7 @@ public class TestOrcRawRecordMerger {
     Writer writer = OrcFile.createWriter(new Path(root, "0000010_0"),
         OrcFile.writerOptions(conf).inspector(inspector).fileSystem(fs)
         .blockPadding(false).bufferSize(10000).compress(CompressionKind.NONE)
-        .stripeSize(1).memory(mgr).version(OrcFile.Version.V_0_11));
+        .stripeSize(1).memory(mgr).batchSize(2).version(OrcFile.Version.V_0_11));
     String[] values= new String[]{"ignore.1", "0.1", "ignore.2", "ignore.3",
        "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6"};
     for(int i=0; i < values.length; ++i) {
@@ -878,7 +878,8 @@ public class TestOrcRawRecordMerger {
     // write a delta
     AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
         .writingBase(false).minimumTransactionId(1).maximumTransactionId(1)
-        .bucket(BUCKET).inspector(inspector).filesystem(fs).recordIdColumn(5).finalDestination(root);
+        .bucket(BUCKET).inspector(inspector).filesystem(fs).recordIdColumn(5)
+        .finalDestination(root);
     RecordUpdater ru = of.getRecordUpdater(root, options);
     values = new String[]{"0.0", null, null, "1.1", null, null, null,
         "ignore.7"};
@@ -972,7 +973,7 @@ public class TestOrcRawRecordMerger {
         .bucket(BUCKET).inspector(inspector).filesystem(fs);
     options.orcOptions(OrcFile.writerOptions(conf)
       .stripeSize(1).blockPadding(false).compress(CompressionKind.NONE)
-      .memory(mgr));
+      .memory(mgr).batchSize(2));
     options.finalDestination(root);
     RecordUpdater ru = of.getRecordUpdater(root, options);
     String[] values= new String[]{"ignore.1", "0.1", "ignore.2", "ignore.3",
@@ -983,7 +984,8 @@ public class TestOrcRawRecordMerger {
     ru.close(false);
 
     // write a delta
-    options.writingBase(false).minimumTransactionId(1).maximumTransactionId(1).recordIdColumn(5);
+    options.writingBase(false).minimumTransactionId(1).maximumTransactionId(1)
+        .recordIdColumn(5);
     ru = of.getRecordUpdater(root, options);
     values = new String[]{"0.0", null, null, "1.1", null, null, null,
         "ignore.7"};
@@ -1020,7 +1022,7 @@ public class TestOrcRawRecordMerger {
 
     // loop through the 5 splits and read each
     for(int i=0; i < 4; ++i) {
-      System.out.println("starting split " + i);
+      System.out.println("starting split " + i + " = " + splits[i]);
       rr = inf.getRecordReader(splits[i], job, Reporter.NULL);
       NullWritable key = rr.createKey();
       OrcStruct value = rr.createValue();

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
index a409be8..6803abd 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
@@ -37,7 +37,7 @@ import org.apache.hadoop.fs.PositionedReadable;
 import org.apache.hadoop.fs.Seekable;
 import org.apache.hadoop.hive.common.io.DiskRangeList;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
+import org.apache.orc.BloomFilterIO;
 import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.Location;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/resources/orc-file-dump-bloomfilter.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter.out b/ql/src/test/resources/orc-file-dump-bloomfilter.out
index 7c3db78..1654e33 100644
--- a/ql/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/ql/src/test/resources/orc-file-dump-bloomfilter.out
@@ -1,5 +1,5 @@
 Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 21000
 Compression: ZLIB
 Compression size: 4096

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/resources/orc-file-dump-bloomfilter2.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter2.out b/ql/src/test/resources/orc-file-dump-bloomfilter2.out
index a4f006b..1f6e046 100644
--- a/ql/src/test/resources/orc-file-dump-bloomfilter2.out
+++ b/ql/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -1,5 +1,5 @@
 Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 21000
 Compression: ZLIB
 Compression size: 4096

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-dictionary-threshold.out b/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
index 8ad856d..64cf0e9 100644
--- a/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ b/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -1,5 +1,5 @@
 Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 21000
 Compression: ZLIB
 Compression size: 4096

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/resources/orc-file-dump.json
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump.json b/ql/src/test/resources/orc-file-dump.json
index 25fd63b..0376d8a 100644
--- a/ql/src/test/resources/orc-file-dump.json
+++ b/ql/src/test/resources/orc-file-dump.json
@@ -1,7 +1,7 @@
 {
   "fileName": "TestFileDump.testDump.orc",
   "fileVersion": "0.12",
-  "writerVersion": "HIVE_4243",
+  "writerVersion": "HIVE_12055",
   "numberOfRows": 21000,
   "compression": "ZLIB",
   "compressionBufferSize": 4096,

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/resources/orc-file-dump.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump.out b/ql/src/test/resources/orc-file-dump.out
index 5aaa0f3..57356d3 100644
--- a/ql/src/test/resources/orc-file-dump.out
+++ b/ql/src/test/resources/orc-file-dump.out
@@ -1,5 +1,5 @@
 Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 21000
 Compression: ZLIB
 Compression size: 4096

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/resources/orc-file-has-null.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out
index 438c27c..0e915c6 100644
--- a/ql/src/test/resources/orc-file-has-null.out
+++ b/ql/src/test/resources/orc-file-has-null.out
@@ -1,5 +1,5 @@
 Structure for TestOrcFile.testHasNull.orc
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 20000
 Compression: ZLIB
 Compression size: 4096

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/results/clientpositive/orc_file_dump.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out
index 43c38a8..4c73bac 100644
--- a/ql/src/test/results/clientpositive/orc_file_dump.q.out
+++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out
@@ -93,7 +93,7 @@ PREHOOK: Input: default@orc_ppd
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 1049
 Compression: ZLIB
 Compression size: 262144
@@ -213,7 +213,7 @@ PREHOOK: Input: default@orc_ppd
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 1049
 Compression: ZLIB
 Compression size: 262144
@@ -345,7 +345,7 @@ PREHOOK: Input: default@orc_ppd_part@ds=2015/hr=10
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 1049
 Compression: ZLIB
 Compression size: 262144

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/results/clientpositive/orc_merge10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_merge10.q.out b/ql/src/test/results/clientpositive/orc_merge10.q.out
index a415776..776ca9a 100644
--- a/ql/src/test/results/clientpositive/orc_merge10.q.out
+++ b/ql/src/test/results/clientpositive/orc_merge10.q.out
@@ -517,7 +517,7 @@ PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 242
 Compression: SNAPPY
 Compression size: 4096
@@ -579,7 +579,7 @@ PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 242
 Compression: SNAPPY
 Compression size: 4096

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/results/clientpositive/orc_merge11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_merge11.q.out b/ql/src/test/results/clientpositive/orc_merge11.q.out
index a7e3d47..65e3d8b 100644
--- a/ql/src/test/results/clientpositive/orc_merge11.q.out
+++ b/ql/src/test/results/clientpositive/orc_merge11.q.out
@@ -72,7 +72,7 @@ PREHOOK: Input: default@orcfile_merge1
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 50000
 Compression: ZLIB
 Compression size: 4096
@@ -133,7 +133,7 @@ ________________________________________________________________________________
 -- END ORC FILE DUMP --
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 50000
 Compression: ZLIB
 Compression size: 4096
@@ -217,7 +217,7 @@ PREHOOK: Input: default@orcfile_merge1
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 100000
 Compression: ZLIB
 Compression size: 4096

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/results/clientpositive/tez/orc_merge10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/orc_merge10.q.out b/ql/src/test/results/clientpositive/tez/orc_merge10.q.out
index d41671a..8b6a595 100644
--- a/ql/src/test/results/clientpositive/tez/orc_merge10.q.out
+++ b/ql/src/test/results/clientpositive/tez/orc_merge10.q.out
@@ -552,7 +552,7 @@ PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 242
 Compression: SNAPPY
 Compression size: 4096
@@ -629,7 +629,7 @@ PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 242
 Compression: SNAPPY
 Compression size: 4096

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
index a7e3d47..65e3d8b 100644
--- a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
+++ b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
@@ -72,7 +72,7 @@ PREHOOK: Input: default@orcfile_merge1
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 50000
 Compression: ZLIB
 Compression size: 4096
@@ -133,7 +133,7 @@ ________________________________________________________________________________
 -- END ORC FILE DUMP --
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 50000
 Compression: ZLIB
 Compression size: 4096
@@ -217,7 +217,7 @@ PREHOOK: Input: default@orcfile_merge1
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_4243
+File Version: 0.12 with HIVE_12055
 Rows: 100000
 Compression: ZLIB
 Compression size: 4096

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java b/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
new file mode 100644
index 0000000..151f30d
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
@@ -0,0 +1,335 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.util;
+
+/**
+ * Estimation of memory footprint of object
+ */
+public enum JavaDataModel {
+
+  JAVA32 {
+    @Override
+    public int object() {
+      return JAVA32_OBJECT;
+    }
+
+    @Override
+    public int array() {
+      return JAVA32_ARRAY;
+    }
+
+    @Override
+    public int ref() {
+      return JAVA32_REF;
+    }
+
+    @Override
+    public int hashMap(int entry) {
+      // base  = JAVA32_OBJECT + PRIMITIVES1 * 4 + JAVA32_FIELDREF * 3 + JAVA32_ARRAY;
+      // entry = JAVA32_OBJECT + JAVA32_FIELDREF + PRIMITIVES1
+      return hashMapBase() + hashMapEntry() * entry;
+    }
+
+    @Override
+    public int hashMapBase() {
+      return 64;
+    }
+
+    @Override
+    public int hashMapEntry() {
+      return 24;
+    }
+
+    @Override
+    public int hashSet(int entry) {
+      // hashMap += JAVA32_OBJECT
+      return hashSetBase() + hashSetEntry() * entry;
+    }
+
+    @Override
+    public int hashSetBase() {
+      return 80;
+    }
+
+    @Override
+    public int hashSetEntry() {
+      return 24;
+    }
+
+    @Override
+    public int linkedHashMap(int entry) {
+      // hashMap += JAVA32_FIELDREF + PRIMITIVES1
+      // hashMap.entry += JAVA32_FIELDREF * 2
+      return 72 + 32 * entry;
+    }
+
+    @Override
+    public int linkedList(int entry) {
+      // base  = JAVA32_OBJECT + PRIMITIVES1 * 2 + JAVA32_FIELDREF;
+      // entry = JAVA32_OBJECT + JAVA32_FIELDREF * 2
+      return linkedListBase() + linkedListEntry() * entry;
+     }
+
+     @Override
+     public int linkedListBase() {
+       return 28;
+     }
+
+     @Override
+     public int linkedListEntry() {
+       return 24;
+     }
+
+    @Override
+    public int arrayList() {
+      // JAVA32_OBJECT + PRIMITIVES1 * 2 + JAVA32_ARRAY;
+      return 44;
+    }
+
+    @Override
+    public int memoryAlign() {
+      return 8;
+    }
+  }, JAVA64 {
+    @Override
+    public int object() {
+      return JAVA64_OBJECT;
+    }
+
+    @Override
+    public int array() {
+      return JAVA64_ARRAY;
+    }
+
+    @Override
+    public int ref() {
+      return JAVA64_REF;
+    }
+
+    @Override
+    public int hashMap(int entry) {
+      // base  = JAVA64_OBJECT + PRIMITIVES1 * 4 + JAVA64_FIELDREF * 3 + JAVA64_ARRAY;
+      // entry = JAVA64_OBJECT + JAVA64_FIELDREF + PRIMITIVES1
+      return hashMapBase() + hashMapEntry() * entry;
+    }
+
+    @Override
+    public int hashMapBase() {
+      return 112;
+    }
+
+
+    @Override
+    public int hashMapEntry() {
+      return 44;
+    }
+
+    @Override
+    public int hashSet(int entry) {
+      // hashMap += JAVA64_OBJECT
+      return hashSetBase() + hashSetEntry() * entry;
+     }
+
+     @Override
+     public int hashSetBase() {
+       return 144;
+     }
+
+     @Override
+     public int hashSetEntry() {
+       return 44;
+     }
+
+    @Override
+    public int linkedHashMap(int entry) {
+      // hashMap += JAVA64_FIELDREF + PRIMITIVES1
+      // hashMap.entry += JAVA64_FIELDREF * 2
+      return 128 + 60 * entry;
+    }
+
+    @Override
+    public int linkedList(int entry) {
+      // base  = JAVA64_OBJECT + PRIMITIVES1 * 2 + JAVA64_FIELDREF;
+      // entry = JAVA64_OBJECT + JAVA64_FIELDREF * 2
+      return linkedListBase() + linkedListEntry() * entry;
+     }
+
+     @Override
+     public int linkedListBase() {
+       return 48;
+     }
+
+     @Override
+     public int linkedListEntry() {
+       return 48;
+     }
+
+    @Override
+    public int arrayList() {
+      // JAVA64_OBJECT + PRIMITIVES1 * 2 + JAVA64_ARRAY;
+      return 80;
+    }
+
+    @Override
+    public int memoryAlign() {
+      return 8;
+    }
+  };
+
+  public abstract int object();
+  public abstract int array();
+  public abstract int ref();
+  public abstract int hashMap(int entry);
+  public abstract int hashMapBase();
+  public abstract int hashMapEntry();
+  public abstract int hashSetBase();
+  public abstract int hashSetEntry();
+  public abstract int hashSet(int entry);
+  public abstract int linkedHashMap(int entry);
+  public abstract int linkedListBase();
+  public abstract int linkedListEntry();
+  public abstract int linkedList(int entry);
+  public abstract int arrayList();
+  public abstract int memoryAlign();
+
+  // ascii string
+  public int lengthFor(String string) {
+    return lengthForStringOfLength(string.length());
+  }
+
+  public int lengthForRandom() {
+    // boolean + double + AtomicLong
+    return object() + primitive1() + primitive2() + object() + primitive2();
+  }
+
+  public int primitive1() {
+    return PRIMITIVES1;
+  }
+  public int primitive2() {
+    return PRIMITIVES2;
+  }
+
+  public static int alignUp(int value, int align) {
+    return (value + align - 1) & ~(align - 1);
+  }
+
+  public static final int JAVA32_META = 12;
+  public static final int JAVA32_ARRAY_META = 16;
+  public static final int JAVA32_REF = 4;
+  public static final int JAVA32_OBJECT = 16;   // JAVA32_META + JAVA32_REF
+  public static final int JAVA32_ARRAY = 20;    // JAVA32_ARRAY_META + JAVA32_REF
+
+  public static final int JAVA64_META = 24;
+  public static final int JAVA64_ARRAY_META = 32;
+  public static final int JAVA64_REF = 8;
+  public static final int JAVA64_OBJECT = 32;   // JAVA64_META + JAVA64_REF
+  public static final int JAVA64_ARRAY = 40;    // JAVA64_ARRAY_META + JAVA64_REF
+
+  public static final int PRIMITIVES1 = 4;      // void, boolean, byte, short, int, float
+  public static final int PRIMITIVES2 = 8;      // long, double
+
+  public static final int PRIMITIVE_BYTE = 1;    // byte
+
+  private static JavaDataModel current;
+
+  public static JavaDataModel get() {
+    if (current != null) {
+      return current;
+    }
+    try {
+      String props = System.getProperty("sun.arch.data.model");
+      if ("32".equals(props)) {
+        return current = JAVA32;
+      }
+    } catch (Exception e) {
+      // ignore
+    }
+    // TODO: separate model is needed for compressedOops, which can be guessed from memory size.
+    return current = JAVA64;
+  }
+
+  public static int round(int size) {
+    JavaDataModel model = get();
+    if (model == JAVA32 || size % 8 == 0) {
+      return size;
+    }
+    return ((size + 8) >> 3) << 3;
+  }
+
+  private int lengthForPrimitiveArrayOfSize(int primitiveSize, int length) {
+    return alignUp(array() + primitiveSize*length, memoryAlign());
+  }
+
+  public int lengthForByteArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length);
+  }
+  public int lengthForObjectArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(ref(), length);
+  }
+  public int lengthForLongArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(primitive2(), length);
+  }
+  public int lengthForDoubleArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(primitive2(), length);
+  }
+  public int lengthForIntArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(primitive1(), length);
+  }
+  public int lengthForBooleanArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length);
+  }
+  public int lengthForTimestampArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(lengthOfTimestamp(), length);
+  }
+  public int lengthForDateArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(lengthOfDate(), length);
+  }
+  public int lengthForDecimalArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(lengthOfDecimal(), length);
+  }
+
+  public int lengthOfDecimal() {
+    // object overhead + 8 bytes for intCompact + 4 bytes for precision
+    // + 4 bytes for scale + size of BigInteger
+    return object() + 2 * primitive2() + lengthOfBigInteger();
+  }
+
+  private int lengthOfBigInteger() {
+    // object overhead + 4 bytes for bitCount + 4 bytes for bitLength
+    // + 4 bytes for firstNonzeroByteNum + 4 bytes for firstNonzeroIntNum +
+    // + 4 bytes for lowestSetBit + 5 bytes for size of magnitude (since max precision
+    // is only 38 for HiveDecimal) + 7 bytes of padding (since java memory allocations
+    // are 8 byte aligned)
+    return object() + 4 * primitive2();
+  }
+
+  public int lengthOfTimestamp() {
+    // object overhead + 4 bytes for int (nanos) + 4 bytes of padding
+    return object() + primitive2();
+  }
+
+  public int lengthOfDate() {
+    // object overhead + 8 bytes for long (fastTime) + 16 bytes for cdate
+    return object() + 3 * primitive2();
+  }
+
+  public int lengthForStringOfLength(int strLen) {
+    return object() + primitive1() * 3 + array() + strLen;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java b/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
new file mode 100644
index 0000000..bb0b8f2
--- /dev/null
+++ b/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
@@ -0,0 +1,309 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+/**
+ * BloomFilter is a probabilistic data structure for set membership check. BloomFilters are
+ * highly space efficient when compared to using a HashSet. Because of the probabilistic nature of
+ * bloom filter false positive (element not present in bloom filter but test() says true) are
+ * possible but false negatives are not possible (if element is present then test() will never
+ * say false). The false positive probability is configurable (default: 5%) depending on which
+ * storage requirement may increase or decrease. Lower the false positive probability greater
+ * is the space requirement.
+ * Bloom filters are sensitive to number of elements that will be inserted in the bloom filter.
+ * During the creation of bloom filter expected number of entries must be specified. If the number
+ * of insertions exceed the specified initial number of entries then false positive probability will
+ * increase accordingly.
+ *
+ * Internally, this implementation of bloom filter uses Murmur3 fast non-cryptographic hash
+ * algorithm. Although Murmur2 is slightly faster than Murmur3 in Java, it suffers from hash
+ * collisions for specific sequence of repeating bytes. Check the following link for more info
+ * https://code.google.com/p/smhasher/wiki/MurmurHash2Flaw
+ */
+public class BloomFilter {
+  public static final double DEFAULT_FPP = 0.05;
+  protected BitSet bitSet;
+  protected int numBits;
+  protected int numHashFunctions;
+
+  public BloomFilter() {
+  }
+
+  public BloomFilter(long expectedEntries) {
+    this(expectedEntries, DEFAULT_FPP);
+  }
+
+  public BloomFilter(long expectedEntries, double fpp) {
+    checkArgument(expectedEntries > 0, "expectedEntries should be > 0");
+    checkArgument(fpp > 0.0 && fpp < 1.0, "False positive probability should be > 0.0 & < 1.0");
+    int nb = optimalNumOfBits(expectedEntries, fpp);
+    // make 'm' multiple of 64
+    this.numBits = nb + (Long.SIZE - (nb % Long.SIZE));
+    this.numHashFunctions = optimalNumOfHashFunctions(expectedEntries, numBits);
+    this.bitSet = new BitSet(numBits);
+  }
+
+  /**
+   * A constructor to support rebuilding the BloomFilter from a serialized representation.
+   * @param bits
+   * @param numBits
+   * @param numFuncs
+   */
+  public BloomFilter(List<Long> bits, int numBits, int numFuncs) {
+    super();
+    long[] copied = new long[bits.size()];
+    for (int i = 0; i < bits.size(); i++) copied[i] = bits.get(i);
+    bitSet = new BitSet(copied);
+    this.numBits = numBits;
+    numHashFunctions = numFuncs;
+  }
+
+  static int optimalNumOfHashFunctions(long n, long m) {
+    return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
+  }
+
+  static int optimalNumOfBits(long n, double p) {
+    return (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
+  }
+
+  public void add(byte[] val) {
+    if (val == null) {
+      addBytes(val, -1, -1);
+    } else {
+      addBytes(val, 0, val.length);
+    }
+  }
+
+  public void addBytes(byte[] val, int offset, int length) {
+    // We use the trick mentioned in "Less Hashing, Same Performance: Building a Better Bloom Filter"
+    // by Kirsch et.al. From abstract 'only two hash functions are necessary to effectively
+    // implement a Bloom filter without any loss in the asymptotic false positive probability'
+
+    // Lets split up 64-bit hashcode into two 32-bit hash codes and employ the technique mentioned
+    // in the above paper
+    long hash64 = val == null ? Murmur3.NULL_HASHCODE :
+        Murmur3.hash64(val, offset, length);
+    addHash(hash64);
+  }
+
+  private void addHash(long hash64) {
+    int hash1 = (int) hash64;
+    int hash2 = (int) (hash64 >>> 32);
+
+    for (int i = 1; i <= numHashFunctions; i++) {
+      int combinedHash = hash1 + (i * hash2);
+      // hashcode should be positive, flip all the bits if it's negative
+      if (combinedHash < 0) {
+        combinedHash = ~combinedHash;
+      }
+      int pos = combinedHash % numBits;
+      bitSet.set(pos);
+    }
+  }
+
+  public void addString(String val) {
+    if (val == null) {
+      add(null);
+    } else {
+      add(val.getBytes());
+    }
+  }
+
+  public void addLong(long val) {
+    addHash(getLongHash(val));
+  }
+
+  public void addDouble(double val) {
+    addLong(Double.doubleToLongBits(val));
+  }
+
+  public boolean test(byte[] val) {
+    if (val == null) {
+      return testBytes(val, -1, -1);
+    }
+    return testBytes(val, 0, val.length);
+  }
+
+  public boolean testBytes(byte[] val, int offset, int length) {
+    long hash64 = val == null ? Murmur3.NULL_HASHCODE :
+        Murmur3.hash64(val, offset, length);
+    return testHash(hash64);
+  }
+
+  private boolean testHash(long hash64) {
+    int hash1 = (int) hash64;
+    int hash2 = (int) (hash64 >>> 32);
+
+    for (int i = 1; i <= numHashFunctions; i++) {
+      int combinedHash = hash1 + (i * hash2);
+      // hashcode should be positive, flip all the bits if it's negative
+      if (combinedHash < 0) {
+        combinedHash = ~combinedHash;
+      }
+      int pos = combinedHash % numBits;
+      if (!bitSet.get(pos)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  public boolean testString(String val) {
+    if (val == null) {
+      return test(null);
+    } else {
+      return test(val.getBytes());
+    }
+  }
+
+  public boolean testLong(long val) {
+    return testHash(getLongHash(val));
+  }
+
+  // Thomas Wang's integer hash function
+  // http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
+  private long getLongHash(long key) {
+    key = (~key) + (key << 21); // key = (key << 21) - key - 1;
+    key = key ^ (key >> 24);
+    key = (key + (key << 3)) + (key << 8); // key * 265
+    key = key ^ (key >> 14);
+    key = (key + (key << 2)) + (key << 4); // key * 21
+    key = key ^ (key >> 28);
+    key = key + (key << 31);
+    return key;
+  }
+
+  public boolean testDouble(double val) {
+    return testLong(Double.doubleToLongBits(val));
+  }
+
+  public long sizeInBytes() {
+    return getBitSize() / 8;
+  }
+
+  public int getBitSize() {
+    return bitSet.getData().length * Long.SIZE;
+  }
+
+  public int getNumHashFunctions() {
+    return numHashFunctions;
+  }
+
+  public long[] getBitSet() {
+    return bitSet.getData();
+  }
+
+  @Override
+  public String toString() {
+    return "m: " + numBits + " k: " + numHashFunctions;
+  }
+
+  /**
+   * Merge the specified bloom filter with current bloom filter.
+   *
+   * @param that - bloom filter to merge
+   */
+  public void merge(BloomFilter that) {
+    if (this != that && this.numBits == that.numBits && this.numHashFunctions == that.numHashFunctions) {
+      this.bitSet.putAll(that.bitSet);
+    } else {
+      throw new IllegalArgumentException("BloomFilters are not compatible for merging." +
+          " this - " + this.toString() + " that - " + that.toString());
+    }
+  }
+
+  public void reset() {
+    this.bitSet.clear();
+  }
+
+  /**
+   * Bare metal bit set implementation. For performance reasons, this implementation does not check
+   * for index bounds nor expand the bit set size if the specified index is greater than the size.
+   */
+  public class BitSet {
+    private final long[] data;
+
+    public BitSet(long bits) {
+      this(new long[(int) Math.ceil((double) bits / (double) Long.SIZE)]);
+    }
+
+    /**
+     * Deserialize long array as bit set.
+     *
+     * @param data - bit array
+     */
+    public BitSet(long[] data) {
+      assert data.length > 0 : "data length is zero!";
+      this.data = data;
+    }
+
+    /**
+     * Sets the bit at specified index.
+     *
+     * @param index - position
+     */
+    public void set(int index) {
+      data[index >>> 6] |= (1L << index);
+    }
+
+    /**
+     * Returns true if the bit is set in the specified index.
+     *
+     * @param index - position
+     * @return - value at the bit position
+     */
+    public boolean get(int index) {
+      return (data[index >>> 6] & (1L << index)) != 0;
+    }
+
+    /**
+     * Number of bits
+     */
+    public long bitSize() {
+      return (long) data.length * Long.SIZE;
+    }
+
+    public long[] getData() {
+      return data;
+    }
+
+    /**
+     * Combines the two BitArrays using bitwise OR.
+     */
+    public void putAll(BitSet array) {
+      assert data.length == array.data.length :
+          "BitArrays must be of equal length (" + data.length + "!= " + array.data.length + ")";
+      for (int i = 0; i < data.length; i++) {
+        data[i] |= array.data[i];
+      }
+    }
+
+    /**
+     * Clear the bit set.
+     */
+    public void clear() {
+      Arrays.fill(data, 0);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/Murmur3.java b/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
new file mode 100644
index 0000000..88c3514
--- /dev/null
+++ b/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
@@ -0,0 +1,335 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+/**
+ * Murmur3 is successor to Murmur2 fast non-crytographic hash algorithms.
+ *
+ * Murmur3 32 and 128 bit variants.
+ * 32-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94
+ * 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255
+ *
+ * This is a public domain code with no copyrights.
+ * From homepage of MurmurHash (https://code.google.com/p/smhasher/),
+ * "All MurmurHash versions are public domain software, and the author disclaims all copyright
+ * to their code."
+ */
+public class Murmur3 {
+  // from 64-bit linear congruential generator
+  public static final long NULL_HASHCODE = 2862933555777941757L;
+
+  // Constants for 32 bit variant
+  private static final int C1_32 = 0xcc9e2d51;
+  private static final int C2_32 = 0x1b873593;
+  private static final int R1_32 = 15;
+  private static final int R2_32 = 13;
+  private static final int M_32 = 5;
+  private static final int N_32 = 0xe6546b64;
+
+  // Constants for 128 bit variant
+  private static final long C1 = 0x87c37b91114253d5L;
+  private static final long C2 = 0x4cf5ad432745937fL;
+  private static final int R1 = 31;
+  private static final int R2 = 27;
+  private static final int R3 = 33;
+  private static final int M = 5;
+  private static final int N1 = 0x52dce729;
+  private static final int N2 = 0x38495ab5;
+
+  private static final int DEFAULT_SEED = 104729;
+
+  /**
+   * Murmur3 32-bit variant.
+   *
+   * @param data - input byte array
+   * @return - hashcode
+   */
+  public static int hash32(byte[] data) {
+    return hash32(data, data.length, DEFAULT_SEED);
+  }
+
+  /**
+   * Murmur3 32-bit variant.
+   *
+   * @param data   - input byte array
+   * @param length - length of array
+   * @param seed   - seed. (default 0)
+   * @return - hashcode
+   */
+  public static int hash32(byte[] data, int length, int seed) {
+    int hash = seed;
+    final int nblocks = length >> 2;
+
+    // body
+    for (int i = 0; i < nblocks; i++) {
+      int i_4 = i << 2;
+      int k = (data[i_4] & 0xff)
+          | ((data[i_4 + 1] & 0xff) << 8)
+          | ((data[i_4 + 2] & 0xff) << 16)
+          | ((data[i_4 + 3] & 0xff) << 24);
+
+      // mix functions
+      k *= C1_32;
+      k = Integer.rotateLeft(k, R1_32);
+      k *= C2_32;
+      hash ^= k;
+      hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
+    }
+
+    // tail
+    int idx = nblocks << 2;
+    int k1 = 0;
+    switch (length - idx) {
+      case 3:
+        k1 ^= data[idx + 2] << 16;
+      case 2:
+        k1 ^= data[idx + 1] << 8;
+      case 1:
+        k1 ^= data[idx];
+
+        // mix functions
+        k1 *= C1_32;
+        k1 = Integer.rotateLeft(k1, R1_32);
+        k1 *= C2_32;
+        hash ^= k1;
+    }
+
+    // finalization
+    hash ^= length;
+    hash ^= (hash >>> 16);
+    hash *= 0x85ebca6b;
+    hash ^= (hash >>> 13);
+    hash *= 0xc2b2ae35;
+    hash ^= (hash >>> 16);
+
+    return hash;
+  }
+
+  /**
+   * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant.
+   *
+   * @param data - input byte array
+   * @return - hashcode
+   */
+  public static long hash64(byte[] data) {
+    return hash64(data, 0, data.length, DEFAULT_SEED);
+  }
+
+  public static long hash64(byte[] data, int offset, int length) {
+    return hash64(data, offset, length, DEFAULT_SEED);
+  }
+
+  /**
+   * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant.
+   *
+   * @param data   - input byte array
+   * @param length - length of array
+   * @param seed   - seed. (default is 0)
+   * @return - hashcode
+   */
+  public static long hash64(byte[] data, int offset, int length, int seed) {
+    long hash = seed;
+    final int nblocks = length >> 3;
+
+    // body
+    for (int i = 0; i < nblocks; i++) {
+      final int i8 = i << 3;
+      long k = ((long) data[offset + i8] & 0xff)
+          | (((long) data[offset + i8 + 1] & 0xff) << 8)
+          | (((long) data[offset + i8 + 2] & 0xff) << 16)
+          | (((long) data[offset + i8 + 3] & 0xff) << 24)
+          | (((long) data[offset + i8 + 4] & 0xff) << 32)
+          | (((long) data[offset + i8 + 5] & 0xff) << 40)
+          | (((long) data[offset + i8 + 6] & 0xff) << 48)
+          | (((long) data[offset + i8 + 7] & 0xff) << 56);
+
+      // mix functions
+      k *= C1;
+      k = Long.rotateLeft(k, R1);
+      k *= C2;
+      hash ^= k;
+      hash = Long.rotateLeft(hash, R2) * M + N1;
+    }
+
+    // tail
+    long k1 = 0;
+    int tailStart = nblocks << 3;
+    switch (length - tailStart) {
+      case 7:
+        k1 ^= ((long) data[offset + tailStart + 6] & 0xff) << 48;
+      case 6:
+        k1 ^= ((long) data[offset + tailStart + 5] & 0xff) << 40;
+      case 5:
+        k1 ^= ((long) data[offset + tailStart + 4] & 0xff) << 32;
+      case 4:
+        k1 ^= ((long) data[offset + tailStart + 3] & 0xff) << 24;
+      case 3:
+        k1 ^= ((long) data[offset + tailStart + 2] & 0xff) << 16;
+      case 2:
+        k1 ^= ((long) data[offset + tailStart + 1] & 0xff) << 8;
+      case 1:
+        k1 ^= ((long) data[offset + tailStart] & 0xff);
+        k1 *= C1;
+        k1 = Long.rotateLeft(k1, R1);
+        k1 *= C2;
+        hash ^= k1;
+    }
+
+    // finalization
+    hash ^= length;
+    hash = fmix64(hash);
+
+    return hash;
+  }
+
+  /**
+   * Murmur3 128-bit variant.
+   *
+   * @param data - input byte array
+   * @return - hashcode (2 longs)
+   */
+  public static long[] hash128(byte[] data) {
+    return hash128(data, 0, data.length, DEFAULT_SEED);
+  }
+
+  /**
+   * Murmur3 128-bit variant.
+   *
+   * @param data   - input byte array
+   * @param offset - the first element of array
+   * @param length - length of array
+   * @param seed   - seed. (default is 0)
+   * @return - hashcode (2 longs)
+   */
+  public static long[] hash128(byte[] data, int offset, int length, int seed) {
+    long h1 = seed;
+    long h2 = seed;
+    final int nblocks = length >> 4;
+
+    // body
+    for (int i = 0; i < nblocks; i++) {
+      final int i16 = i << 4;
+      long k1 = ((long) data[offset + i16] & 0xff)
+          | (((long) data[offset + i16 + 1] & 0xff) << 8)
+          | (((long) data[offset + i16 + 2] & 0xff) << 16)
+          | (((long) data[offset + i16 + 3] & 0xff) << 24)
+          | (((long) data[offset + i16 + 4] & 0xff) << 32)
+          | (((long) data[offset + i16 + 5] & 0xff) << 40)
+          | (((long) data[offset + i16 + 6] & 0xff) << 48)
+          | (((long) data[offset + i16 + 7] & 0xff) << 56);
+
+      long k2 = ((long) data[offset + i16 + 8] & 0xff)
+          | (((long) data[offset + i16 + 9] & 0xff) << 8)
+          | (((long) data[offset + i16 + 10] & 0xff) << 16)
+          | (((long) data[offset + i16 + 11] & 0xff) << 24)
+          | (((long) data[offset + i16 + 12] & 0xff) << 32)
+          | (((long) data[offset + i16 + 13] & 0xff) << 40)
+          | (((long) data[offset + i16 + 14] & 0xff) << 48)
+          | (((long) data[offset + i16 + 15] & 0xff) << 56);
+
+      // mix functions for k1
+      k1 *= C1;
+      k1 = Long.rotateLeft(k1, R1);
+      k1 *= C2;
+      h1 ^= k1;
+      h1 = Long.rotateLeft(h1, R2);
+      h1 += h2;
+      h1 = h1 * M + N1;
+
+      // mix functions for k2
+      k2 *= C2;
+      k2 = Long.rotateLeft(k2, R3);
+      k2 *= C1;
+      h2 ^= k2;
+      h2 = Long.rotateLeft(h2, R1);
+      h2 += h1;
+      h2 = h2 * M + N2;
+    }
+
+    // tail
+    long k1 = 0;
+    long k2 = 0;
+    int tailStart = nblocks << 4;
+    switch (length - tailStart) {
+      case 15:
+        k2 ^= (long) (data[offset + tailStart + 14] & 0xff) << 48;
+      case 14:
+        k2 ^= (long) (data[offset + tailStart + 13] & 0xff) << 40;
+      case 13:
+        k2 ^= (long) (data[offset + tailStart + 12] & 0xff) << 32;
+      case 12:
+        k2 ^= (long) (data[offset + tailStart + 11] & 0xff) << 24;
+      case 11:
+        k2 ^= (long) (data[offset + tailStart + 10] & 0xff) << 16;
+      case 10:
+        k2 ^= (long) (data[offset + tailStart + 9] & 0xff) << 8;
+      case 9:
+        k2 ^= (long) (data[offset + tailStart + 8] & 0xff);
+        k2 *= C2;
+        k2 = Long.rotateLeft(k2, R3);
+        k2 *= C1;
+        h2 ^= k2;
+
+      case 8:
+        k1 ^= (long) (data[offset + tailStart + 7] & 0xff) << 56;
+      case 7:
+        k1 ^= (long) (data[offset + tailStart + 6] & 0xff) << 48;
+      case 6:
+        k1 ^= (long) (data[offset + tailStart + 5] & 0xff) << 40;
+      case 5:
+        k1 ^= (long) (data[offset + tailStart + 4] & 0xff) << 32;
+      case 4:
+        k1 ^= (long) (data[offset + tailStart + 3] & 0xff) << 24;
+      case 3:
+        k1 ^= (long) (data[offset + tailStart + 2] & 0xff) << 16;
+      case 2:
+        k1 ^= (long) (data[offset + tailStart + 1] & 0xff) << 8;
+      case 1:
+        k1 ^= (long) (data[offset + tailStart] & 0xff);
+        k1 *= C1;
+        k1 = Long.rotateLeft(k1, R1);
+        k1 *= C2;
+        h1 ^= k1;
+    }
+
+    // finalization
+    h1 ^= length;
+    h2 ^= length;
+
+    h1 += h2;
+    h2 += h1;
+
+    h1 = fmix64(h1);
+    h2 = fmix64(h2);
+
+    h1 += h2;
+    h2 += h1;
+
+    return new long[]{h1, h2};
+  }
+
+  private static long fmix64(long h) {
+    h ^= (h >>> 33);
+    h *= 0xff51afd7ed558ccdL;
+    h ^= (h >>> 33);
+    h *= 0xc4ceb9fe1a85ec53L;
+    h ^= (h >>> 33);
+    return h;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/06e39ebe/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
----------------------------------------------------------------------
diff --git a/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java b/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
new file mode 100644
index 0000000..5facc7c
--- /dev/null
+++ b/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * Tests for Murmur3 variants.
+ */
+public class TestMurmur3 {
+
+  @Test
+  public void testHashCodesM3_32_string() {
+    String key = "test";
+    int seed = 123;
+    HashFunction hf = Hashing.murmur3_32(seed);
+    int hc1 = hf.hashBytes(key.getBytes()).asInt();
+    int hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
+    assertEquals(hc1, hc2);
+
+    key = "testkey";
+    hc1 = hf.hashBytes(key.getBytes()).asInt();
+    hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
+    assertEquals(hc1, hc2);
+  }
+
+  @Test
+  public void testHashCodesM3_32_ints() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_32(seed);
+    for (int i = 0; i < 1000; i++) {
+      int val = rand.nextInt();
+      byte[] data = ByteBuffer.allocate(4).putInt(val).array();
+      int hc1 = hf.hashBytes(data).asInt();
+      int hc2 = Murmur3.hash32(data, data.length, seed);
+      assertEquals(hc1, hc2);
+    }
+  }
+
+  @Test
+  public void testHashCodesM3_32_longs() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_32(seed);
+    for (int i = 0; i < 1000; i++) {
+      long val = rand.nextLong();
+      byte[] data = ByteBuffer.allocate(8).putLong(val).array();
+      int hc1 = hf.hashBytes(data).asInt();
+      int hc2 = Murmur3.hash32(data, data.length, seed);
+      assertEquals(hc1, hc2);
+    }
+  }
+
+  @Test
+  public void testHashCodesM3_32_double() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_32(seed);
+    for (int i = 0; i < 1000; i++) {
+      double val = rand.nextDouble();
+      byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
+      int hc1 = hf.hashBytes(data).asInt();
+      int hc2 = Murmur3.hash32(data, data.length, seed);
+      assertEquals(hc1, hc2);
+    }
+  }
+
+  @Test
+  public void testHashCodesM3_128_string() {
+    String key = "test";
+    int seed = 123;
+    HashFunction hf = Hashing.murmur3_128(seed);
+    // guava stores the hashcodes in little endian order
+    ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+    buf.put(hf.hashBytes(key.getBytes()).asBytes());
+    buf.flip();
+    long gl1 = buf.getLong();
+    long gl2 = buf.getLong(8);
+    long[] hc = Murmur3.hash128(key.getBytes(), 0, key.getBytes().length, seed);
+    long m1 = hc[0];
+    long m2 = hc[1];
+    assertEquals(gl1, m1);
+    assertEquals(gl2, m2);
+
+    key = "testkey128_testkey128";
+    buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+    buf.put(hf.hashBytes(key.getBytes()).asBytes());
+    buf.flip();
+    gl1 = buf.getLong();
+    gl2 = buf.getLong(8);
+    byte[] keyBytes = key.getBytes();
+    hc = Murmur3.hash128(keyBytes, 0, keyBytes.length, seed);
+    m1 = hc[0];
+    m2 = hc[1];
+    assertEquals(gl1, m1);
+    assertEquals(gl2, m2);
+
+    byte[] offsetKeyBytes = new byte[keyBytes.length + 35];
+    Arrays.fill(offsetKeyBytes, (byte) -1);
+    System.arraycopy(keyBytes, 0, offsetKeyBytes, 35, keyBytes.length);
+    hc = Murmur3.hash128(offsetKeyBytes, 35, keyBytes.length, seed);
+    assertEquals(gl1, hc[0]);
+    assertEquals(gl2, hc[1]);
+  }
+
+  @Test
+  public void testHashCodeM3_64() {
+    byte[] origin = ("It was the best of times, it was the worst of times," +
+        " it was the age of wisdom, it was the age of foolishness," +
+        " it was the epoch of belief, it was the epoch of incredulity," +
+        " it was the season of Light, it was the season of Darkness," +
+        " it was the spring of hope, it was the winter of despair," +
+        " we had everything before us, we had nothing before us," +
+        " we were all going direct to Heaven," +
+        " we were all going direct the other way.").getBytes();
+    long hash = Murmur3.hash64(origin, 0, origin.length);
+    assertEquals(305830725663368540L, hash);
+
+    byte[] originOffset = new byte[origin.length + 150];
+    Arrays.fill(originOffset, (byte) 123);
+    System.arraycopy(origin, 0, originOffset, 150, origin.length);
+    hash = Murmur3.hash64(originOffset, 150, origin.length);
+    assertEquals(305830725663368540L, hash);
+  }
+
+  @Test
+  public void testHashCodesM3_128_ints() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_128(seed);
+    for (int i = 0; i < 1000; i++) {
+      int val = rand.nextInt();
+      byte[] data = ByteBuffer.allocate(4).putInt(val).array();
+      // guava stores the hashcodes in little endian order
+      ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+      buf.put(hf.hashBytes(data).asBytes());
+      buf.flip();
+      long gl1 = buf.getLong();
+      long gl2 = buf.getLong(8);
+      long[] hc = Murmur3.hash128(data, 0, data.length, seed);
+      long m1 = hc[0];
+      long m2 = hc[1];
+      assertEquals(gl1, m1);
+      assertEquals(gl2, m2);
+
+      byte[] offsetData = new byte[data.length + 50];
+      System.arraycopy(data, 0, offsetData, 50, data.length);
+      hc = Murmur3.hash128(offsetData, 50, data.length, seed);
+      assertEquals(gl1, hc[0]);
+      assertEquals(gl2, hc[1]);
+    }
+  }
+
+  @Test
+  public void testHashCodesM3_128_longs() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_128(seed);
+    for (int i = 0; i < 1000; i++) {
+      long val = rand.nextLong();
+      byte[] data = ByteBuffer.allocate(8).putLong(val).array();
+      // guava stores the hashcodes in little endian order
+      ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+      buf.put(hf.hashBytes(data).asBytes());
+      buf.flip();
+      long gl1 = buf.getLong();
+      long gl2 = buf.getLong(8);
+      long[] hc = Murmur3.hash128(data, 0, data.length, seed);
+      long m1 = hc[0];
+      long m2 = hc[1];
+      assertEquals(gl1, m1);
+      assertEquals(gl2, m2);
+    }
+  }
+
+  @Test
+  public void testHashCodesM3_128_double() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_128(seed);
+    for (int i = 0; i < 1000; i++) {
+      double val = rand.nextDouble();
+      byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
+      // guava stores the hashcodes in little endian order
+      ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+      buf.put(hf.hashBytes(data).asBytes());
+      buf.flip();
+      long gl1 = buf.getLong();
+      long gl2 = buf.getLong(8);
+      long[] hc = Murmur3.hash128(data, 0, data.length, seed);
+      long m1 = hc[0];
+      long m2 = hc[1];
+      assertEquals(gl1, m1);
+      assertEquals(gl2, m2);
+    }
+  }
+}


Mime
View raw message