hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mmccl...@apache.org
Subject [2/3] hive git commit: HIVE-13874: Tighten up EOF checking in Fast DeserializeRead classes; display better exception information; add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin)
Date Sat, 20 Aug 2016 08:41:29 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
new file mode 100644
index 0000000..3f02eb3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
@@ -0,0 +1,718 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastRowHashMap.VerifyFastRowHashMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
+import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
+import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.junit.Test;
+
+/*
+ * An multi-key value hash map optimized for vector map join.
+ *
+ * The key is uninterpreted bytes.
+ */
+public class TestVectorMapJoinFastRowHashMap extends CommonFastHashTable {
+
+  private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows,
+      VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType,
+      VerifyFastRowHashMap verifyTable, String[] keyTypeNames,
+      boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
+
+    final int keyCount = keyTypeNames.length;
+    PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
+    PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
+    ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList =
+        new ArrayList<ObjectInspector>(keyCount);
+
+    for (int i = 0; i < keyCount; i++) {
+      PrimitiveTypeInfo primitiveTypeInfo =
+          (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
+      keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
+      PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+      keyPrimitiveCategories[i] = primitiveCategory;
+      keyPrimitiveObjectInspectorList.add(
+          PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
+    }
+
+    boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
+    Arrays.fill(keyColumnSortOrderIsDesc, false);
+    byte[] keyColumnNullMarker = new byte[keyCount];
+    Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
+    byte[] keyColumnNotNullMarker = new byte[keyCount];
+    Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
+
+    BinarySortableSerializeWrite keySerializeWrite =
+        new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc,
+            keyColumnNullMarker, keyColumnNotNullMarker);
+
+
+    PrimitiveTypeInfo[] valuePrimitiveTypeInfos = valueSource.primitiveTypeInfos();
+    final int columnCount = valuePrimitiveTypeInfos.length;
+
+    SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
+
+    final int count = rows.length;
+    for (int i = 0; i < count; i++) {
+
+      Object[] valueRow = rows[i];
+
+      Output valueOutput = new Output();
+        ((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
+
+      for (int index = 0; index < columnCount; index++) {
+
+        Writable writable = (Writable) valueRow[index];
+
+        VerifyFastRow.serializeWrite(valueSerializeWrite, valuePrimitiveTypeInfos[index], writable);
+      }
+
+      byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
+
+      // Add a new key or add a value to an existing key?
+      byte[] key;
+      if (random.nextBoolean() || verifyTable.getCount() == 0) {
+        Object[] keyRow =
+            VectorRandomRowSource.randomRow(keyCount, random, keyPrimitiveObjectInspectorList,
+                keyPrimitiveCategories, keyPrimitiveTypeInfos);
+
+        Output keyOutput = new Output();
+        keySerializeWrite.set(keyOutput);
+
+        for (int index = 0; index < keyCount; index++) {
+
+          Writable writable = (Writable) keyRow[index];
+
+          VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], writable);
+        }
+
+        key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
+
+        verifyTable.add(key, keyRow, value, valueRow);
+      } else {
+        key = verifyTable.addRandomExisting(value, valueRow, random);
+      }
+
+      // Serialize keyRow into key bytes.
+      BytesWritable keyWritable = new BytesWritable(key);
+      BytesWritable valueWritable = new BytesWritable(value);
+      map.putRow(keyWritable, valueWritable);
+      // verifyTable.verify(map);
+    }
+    verifyTable.verify(map, hashTableKeyType, valuePrimitiveTypeInfos,
+        doClipping, useExactBytes, random);
+  }
+
+  @Test
+  public void testBigIntRows() throws Exception {
+    random = new Random(927337);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.LONG,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.LONG, verifyTable,
+        new String[] { "bigint" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testIntRows() throws Exception {
+    random = new Random(927337);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.INT,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.INT, verifyTable,
+        new String[] { "int" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testStringRows() throws Exception {
+    random = new Random(927337);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastStringHashMap map =
+        new VectorMapJoinFastStringHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.STRING, verifyTable,
+        new String[] { "string" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRows1() throws Exception {
+    random = new Random(833);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "int", "int" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRows2() throws Exception {
+    random = new Random(833099);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "string", "string" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRows3() throws Exception {
+    random = new Random(833099);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "bigint", "timestamp", "double" },
+        /* doClipping */ false, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testBigIntRowsClipped() throws Exception {
+    random = new Random(326232);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.LONG,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.LONG, verifyTable,
+        new String[] { "bigint" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testIntRowsClipped() throws Exception {
+    random = new Random(326232);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.INT,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.INT, verifyTable,
+        new String[] { "int" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testStringRowsClipped() throws Exception {
+    random = new Random(326232);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastStringHashMap map =
+        new VectorMapJoinFastStringHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.STRING, verifyTable,
+        new String[] { "string" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRowsClipped1() throws Exception {
+    random = new Random(2331);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "varchar(20)", "date", "interval_day_time" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRowsClipped2() throws Exception {
+    random = new Random(7403);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "varchar(20)", "varchar(40)" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+  @Test
+  public void testMultiKeyRowsClipped3() throws Exception {
+    random = new Random(99);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "float", "tinyint" },
+        /* doClipping */ true, /* useExactBytes */ false);
+  }
+
+
+  @Test
+  public void testBigIntRowsExact() throws Exception {
+    random = new Random(27722);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.LONG,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.LONG, verifyTable,
+        new String[] { "bigint" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testIntRowsExact() throws Exception {
+    random = new Random(8238383);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.INT,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.INT, verifyTable,
+        new String[] { "int" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testStringRowsExact() throws Exception {
+    random = new Random(8235);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastStringHashMap map =
+        new VectorMapJoinFastStringHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.STRING, verifyTable,
+        new String[] { "string" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsExact1() throws Exception {
+    random = new Random(8235);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "string", "string", "string", "string" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsExact2() throws Exception {
+    random = new Random(8235);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "smallint" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsExact3() throws Exception {
+    random = new Random(8235);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "int", "binary" },
+        /* doClipping */ false, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testBigIntRowsClippedExact() throws Exception {
+    random = new Random(2122);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.LONG,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.LONG, verifyTable,
+        new String[] { "bigint" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testIntRowsClippedExact() throws Exception {
+    random = new Random(7520);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastLongHashMap map =
+        new VectorMapJoinFastLongHashMap(
+            false, false, HashTableKeyType.INT,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.INT, verifyTable,
+        new String[] { "int" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testStringRowsClippedExact() throws Exception {
+    random = new Random(7539);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastStringHashMap map =
+        new VectorMapJoinFastStringHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.STRING, verifyTable,
+        new String[] { "string" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsClippedExact1() throws Exception {
+    random = new Random(13);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "interval_year_month", "decimal(12,8)" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsClippedExact2() throws Exception {
+    random = new Random(12);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "bigint", "string", "int" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+
+  @Test
+  public void testMultiKeyRowsClippedExact3() throws Exception {
+    random = new Random(7);
+
+    // Use a large capacity that doesn't require expansion, yet.
+    VectorMapJoinFastMultiKeyHashMap map =
+        new VectorMapJoinFastMultiKeyHashMap(
+            false,
+            LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
+
+    VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+
+    VectorRandomRowSource valueSource = new VectorRandomRowSource();
+    valueSource.init(random);
+
+    int rowCount = 10000;
+    Object[][] rows = valueSource.randomRows(rowCount);
+
+    addAndVerifyRows(valueSource, rows,
+        map, HashTableKeyType.MULTI_KEY, verifyTable,
+        new String[] { "bigint", "string", "varchar(5000)" },
+        /* doClipping */ true, /* useExactBytes */ true);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
new file mode 100644
index 0000000..118e9e2
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java
@@ -0,0 +1,397 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
+import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * (Copy of VerifyFast from serde).
+ *
+ */
+public class VerifyFastRow {
+
+  public static void verifyDeserializeRead(DeserializeRead deserializeRead,
+      PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
+
+    boolean isNull;
+
+    isNull = deserializeRead.readCheckNull();
+    if (isNull) {
+      if (writable != null) {
+        TestCase.fail("Field reports null but object is not null");
+      }
+      return;
+    } else if (writable == null) {
+      TestCase.fail("Field report not null but object is null");
+    }
+    switch (primitiveTypeInfo.getPrimitiveCategory()) {
+      case BOOLEAN:
+      {
+        boolean value = deserializeRead.currentBoolean;
+        if (!(writable instanceof BooleanWritable)) {
+          TestCase.fail("Boolean expected writable not Boolean");
+        }
+        boolean expected = ((BooleanWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case BYTE:
+      {
+        byte value = deserializeRead.currentByte;
+        if (!(writable instanceof ByteWritable)) {
+          TestCase.fail("Byte expected writable not Byte");
+        }
+        byte expected = ((ByteWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
+        }
+      }
+      break;
+    case SHORT:
+      {
+        short value = deserializeRead.currentShort;
+        if (!(writable instanceof ShortWritable)) {
+          TestCase.fail("Short expected writable not Short");
+        }
+        short expected = ((ShortWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case INT:
+      {
+        int value = deserializeRead.currentInt;
+        if (!(writable instanceof IntWritable)) {
+          TestCase.fail("Integer expected writable not Integer");
+        }
+        int expected = ((IntWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case LONG:
+      {
+        long value = deserializeRead.currentLong;
+        if (!(writable instanceof LongWritable)) {
+          TestCase.fail("Long expected writable not Long");
+        }
+        Long expected = ((LongWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case FLOAT:
+      {
+        float value = deserializeRead.currentFloat;
+        if (!(writable instanceof FloatWritable)) {
+          TestCase.fail("Float expected writable not Float");
+        }
+        float expected = ((FloatWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case DOUBLE:
+      {
+        double value = deserializeRead.currentDouble;
+        if (!(writable instanceof DoubleWritable)) {
+          TestCase.fail("Double expected writable not Double");
+        }
+        double expected = ((DoubleWritable) writable).get();
+        if (value != expected) {
+          TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
+        }
+      }
+      break;
+    case STRING:
+      {
+        byte[] stringBytes = Arrays.copyOfRange(
+            deserializeRead.currentBytes,
+            deserializeRead.currentBytesStart,
+            deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+        Text text = new Text(stringBytes);
+        String string = text.toString();
+        String expected = ((Text) writable).toString();
+        if (!string.equals(expected)) {
+          TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
+        }
+      }
+      break;
+    case CHAR:
+      {
+        byte[] stringBytes = Arrays.copyOfRange(
+            deserializeRead.currentBytes,
+            deserializeRead.currentBytesStart,
+            deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+        Text text = new Text(stringBytes);
+        String string = text.toString();
+
+        HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
+
+        HiveChar expected = ((HiveCharWritable) writable).getHiveChar();
+        if (!hiveChar.equals(expected)) {
+          TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
+        }
+      }
+      break;
+    case VARCHAR:
+      {
+        byte[] stringBytes = Arrays.copyOfRange(
+            deserializeRead.currentBytes,
+            deserializeRead.currentBytesStart,
+            deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+        Text text = new Text(stringBytes);
+        String string = text.toString();
+
+        HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
+
+        HiveVarchar expected = ((HiveVarcharWritable) writable).getHiveVarchar();
+        if (!hiveVarchar.equals(expected)) {
+          TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
+        }
+      }
+      break;
+    case DECIMAL:
+      {
+        HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
+        if (value == null) {
+          TestCase.fail("Decimal field evaluated to NULL");
+        }
+        HiveDecimal expected = ((HiveDecimalWritable) writable).getHiveDecimal();
+        if (!value.equals(expected)) {
+          DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
+          int precision = decimalTypeInfo.getPrecision();
+          int scale = decimalTypeInfo.getScale();
+          TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
+        }
+      }
+      break;
+    case DATE:
+      {
+        Date value = deserializeRead.currentDateWritable.get();
+        Date expected = ((DateWritable) writable).get();
+        if (!value.equals(expected)) {
+          TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+        }
+      }
+      break;
+    case TIMESTAMP:
+      {
+        Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
+        Timestamp expected = ((TimestampWritable) writable).getTimestamp();
+        if (!value.equals(expected)) {
+          TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+        }
+      }
+      break;
+    case INTERVAL_YEAR_MONTH:
+      {
+        HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
+        HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
+        if (!value.equals(expected)) {
+          TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+        }
+      }
+      break;
+    case INTERVAL_DAY_TIME:
+      {
+        HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
+        HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
+        if (!value.equals(expected)) {
+          TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
+        }
+      }
+      break;
+    case BINARY:
+      {
+        byte[] byteArray = Arrays.copyOfRange(
+            deserializeRead.currentBytes,
+            deserializeRead.currentBytesStart,
+            deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
+        BytesWritable bytesWritable = (BytesWritable) writable;
+        byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
+        if (byteArray.length != expected.length){
+          TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected)
+              + " found " + Arrays.toString(byteArray) + ")");
+        }
+        for (int b = 0; b < byteArray.length; b++) {
+          if (byteArray[b] != expected[b]) {
+            TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected)
+              + " found " + Arrays.toString(byteArray) + ")");
+          }
+        }
+      }
+      break;
+    default:
+      throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
+    }
+  }
+
+  public static void serializeWrite(SerializeWrite serializeWrite,
+      PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
+    if (writable == null) {
+      serializeWrite.writeNull();
+      return;
+    }
+    switch (primitiveTypeInfo.getPrimitiveCategory()) {
+      case BOOLEAN:
+      {
+        boolean value = ((BooleanWritable) writable).get();
+        serializeWrite.writeBoolean(value);
+      }
+      break;
+    case BYTE:
+      {
+        byte value = ((ByteWritable) writable).get();
+        serializeWrite.writeByte(value);
+      }
+      break;
+    case SHORT:
+      {
+        short value = ((ShortWritable) writable).get();
+        serializeWrite.writeShort(value);
+      }
+      break;
+    case INT:
+      {
+        int value = ((IntWritable) writable).get();
+        serializeWrite.writeInt(value);
+      }
+      break;
+    case LONG:
+      {
+        long value = ((LongWritable) writable).get();
+        serializeWrite.writeLong(value);
+      }
+      break;
+    case FLOAT:
+      {
+        float value = ((FloatWritable) writable).get();
+        serializeWrite.writeFloat(value);
+      }
+      break;
+    case DOUBLE:
+      {
+        double value = ((DoubleWritable) writable).get();
+        serializeWrite.writeDouble(value);
+      }
+      break;
+    case STRING:
+      {
+        Text value = (Text) writable;
+        byte[] stringBytes = value.getBytes();
+        int stringLength = stringBytes.length;
+        serializeWrite.writeString(stringBytes, 0, stringLength);
+      }
+      break;
+    case CHAR:
+      {
+        HiveChar value = ((HiveCharWritable) writable).getHiveChar();
+        serializeWrite.writeHiveChar(value);
+      }
+      break;
+    case VARCHAR:
+      {
+        HiveVarchar value = ((HiveVarcharWritable) writable).getHiveVarchar();
+        serializeWrite.writeHiveVarchar(value);
+      }
+      break;
+    case DECIMAL:
+      {
+        HiveDecimal value = ((HiveDecimalWritable) writable).getHiveDecimal();
+        DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)primitiveTypeInfo;
+        serializeWrite.writeHiveDecimal(value, decTypeInfo.scale());
+      }
+      break;
+    case DATE:
+      {
+        Date value = ((DateWritable) writable).get();
+        serializeWrite.writeDate(value);
+      }
+      break;
+    case TIMESTAMP:
+      {
+        Timestamp value = ((TimestampWritable) writable).getTimestamp();
+        serializeWrite.writeTimestamp(value);
+      }
+      break;
+    case INTERVAL_YEAR_MONTH:
+      {
+        HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
+        serializeWrite.writeHiveIntervalYearMonth(value);
+      }
+      break;
+    case INTERVAL_DAY_TIME:
+      {
+        HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
+        serializeWrite.writeHiveIntervalDayTime(value);
+      }
+      break;
+    case BINARY:
+      {
+        BytesWritable byteWritable = (BytesWritable) writable;
+        byte[] binaryBytes = byteWritable.getBytes();
+        int length = byteWritable.getLength();
+        serializeWrite.writeBinary(binaryBytes, 0, length);
+      }
+      break;
+    default:
+      throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory().name());
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
index be36ba4..003a2d4 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
@@ -42,7 +42,7 @@ import org.apache.hadoop.io.Text;
  *
  * Reading some fields require a results object to receive value information.  A separate
  * results object is created by the caller at initialization per different field even for the same
- * type. 
+ * type.
  *
  * Some type values are by reference to either bytes in the deserialization buffer or to
  * other type specific buffers.  So, those references are only valid until the next time set is
@@ -61,6 +61,8 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
   private int fieldCount;
 
   private int start;
+  private int end;
+  private int fieldStart;
 
   private byte[] tempTimestampBytes;
   private Text tempText;
@@ -68,7 +70,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
   private byte[] tempDecimalBuffer;
 
   private boolean readBeyondConfiguredFieldsWarned;
-  private boolean readBeyondBufferRangeWarned;
   private boolean bufferRangeHasExtraDataWarned;
 
   private InputByteBuffer inputByteBuffer = new InputByteBuffer();
@@ -92,7 +93,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
     }
     inputByteBuffer = new InputByteBuffer();
     readBeyondConfiguredFieldsWarned = false;
-    readBeyondBufferRangeWarned = false;
     bufferRangeHasExtraDataWarned = false;
   }
 
@@ -107,8 +107,40 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
   @Override
   public void set(byte[] bytes, int offset, int length) {
     fieldIndex = -1;
-    inputByteBuffer.reset(bytes, offset, offset + length);
     start = offset;
+    end = offset + length;
+    inputByteBuffer.reset(bytes, start, end);
+  }
+
+  /*
+   * Get detailed read position information to help diagnose exceptions.
+   */
+  public String getDetailedReadPositionString() {
+    StringBuffer sb = new StringBuffer();
+
+    sb.append("Reading inputByteBuffer of length ");
+    sb.append(inputByteBuffer.getEnd());
+    sb.append(" at start offset ");
+    sb.append(start);
+    sb.append(" for length ");
+    sb.append(end - start);
+    sb.append(" to read ");
+    sb.append(fieldCount);
+    sb.append(" fields with types ");
+    sb.append(Arrays.toString(typeInfos));
+    sb.append(".  ");
+    if (fieldIndex == -1) {
+      sb.append("Before first field?");
+    } else {
+      sb.append("Read field #");
+      sb.append(fieldIndex);
+      sb.append(" at field start position ");
+      sb.append(fieldStart);
+      sb.append(" current read offset ");
+      sb.append(inputByteBuffer.tell());
+    }
+
+    return sb.toString();
   }
 
   /*
@@ -133,12 +165,11 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
     }
     if (inputByteBuffer.isEof()) {
       // Also, reading beyond our byte range produces NULL.
-      if (!readBeyondBufferRangeWarned) {
-        doReadBeyondBufferRangeWarned();
-      }
-      // We cannot read beyond so we must return NULL here.
       return true;
     }
+
+    fieldStart = inputByteBuffer.tell();
+
     byte isNullByte = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]);
 
     if (isNullByte == 0) {
@@ -298,7 +329,7 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
           factor = -factor;
         }
 
-        int start = inputByteBuffer.tell();
+        int decimalStart = inputByteBuffer.tell();
         int length = 0;
 
         do {
@@ -317,7 +348,7 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
           tempDecimalBuffer = new byte[length];
         }
 
-        inputByteBuffer.seek(start);
+        inputByteBuffer.seek(decimalStart);
         for (int i = 0; i < length; ++i) {
           tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert);
         }
@@ -392,10 +423,6 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
     return readBeyondConfiguredFieldsWarned;
   }
   @Override
-  public boolean readBeyondBufferRangeWarned() {
-    return readBeyondBufferRangeWarned;
-  }
-  @Override
   public boolean bufferRangeHasExtraDataWarned() {
     return bufferRangeHasExtraDataWarned;
   }
@@ -410,14 +437,4 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
         + " reading more (NULLs returned).  Ignoring similar problems.");
     readBeyondConfiguredFieldsWarned = true;
   }
-
-  private void doReadBeyondBufferRangeWarned() {
-    // Warn only once.
-    int length = inputByteBuffer.tell() - start;
-    LOG.info("Reading beyond buffer range! Buffer range " +  start 
-        + " for length " + length + " but reading more... "
-        + "(total buffer length " + inputByteBuffer.getData().length + ")"
-        + "  Ignoring similar problems.");
-    readBeyondBufferRangeWarned = true;
-  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
index 2fad2af..8f3e771 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
@@ -143,10 +143,14 @@ public abstract class DeserializeRead {
    * Read integrity warning flags.
    */
   public abstract boolean readBeyondConfiguredFieldsWarned();
-  public abstract boolean readBeyondBufferRangeWarned();
   public abstract boolean bufferRangeHasExtraDataWarned();
 
   /*
+   * Get detailed read position information to help diagnose exceptions.
+   */
+  public abstract String getDetailedReadPositionString();
+
+  /*
    * These members hold the current value that was read when readCheckNull return false.
    */
 

http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
deleted file mode 100644
index 1bb990c..0000000
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java
+++ /dev/null
@@ -1,423 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.serde2.fast;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
-import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.common.type.RandomTypeUtil;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
-import org.apache.hive.common.util.DateUtils;
-
-/**
- * Generate object inspector and random row object[].
- */
-public class RandomRowObjectSource {
-
-  private Random r;
-
-  private int columnCount;
-
-  private List<String> typeNames;
-
-  private PrimitiveCategory[] primitiveCategories;
-
-  private PrimitiveTypeInfo[] primitiveTypeInfos;
-
-  private List<ObjectInspector> primitiveObjectInspectorList;
-
-  private StructObjectInspector rowStructObjectInspector;
-
-  public List<String> typeNames() {
-    return typeNames;
-  }
-
-  public PrimitiveCategory[] primitiveCategories() {
-    return primitiveCategories;
-  }
-
-  public PrimitiveTypeInfo[] primitiveTypeInfos() {
-    return primitiveTypeInfos;
-  }
-
-  public StructObjectInspector rowStructObjectInspector() {
-    return rowStructObjectInspector;
-  }
-
-  public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) {
-    ArrayList<ObjectInspector> partialPrimitiveObjectInspectorList =
-        new ArrayList<ObjectInspector>(partialFieldCount);
-    List<String> columnNames = new ArrayList<String>(partialFieldCount);
-    for (int i = 0; i < partialFieldCount; i++) {
-      columnNames.add(String.format("partial%d", i));
-      partialPrimitiveObjectInspectorList.add(
-          PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
-              primitiveTypeInfos[i]));
-    }
-
-    return ObjectInspectorFactory.getStandardStructObjectInspector(
-        columnNames, primitiveObjectInspectorList);
-  }
-
-  public void init(Random r) {
-    this.r = r;
-    chooseSchema();
-  }
-
-  /*
-   * For now, exclude CHAR until we determine why there is a difference (blank padding)
-   * serializing with LazyBinarySerializeWrite and the regular SerDe...
-   */
-  private static String[] possibleHiveTypeNames = {
-      "boolean",
-      "tinyint",
-      "smallint",
-      "int",
-      "bigint",
-      "date",
-      "float",
-      "double",
-      "string",
-//    "char",
-      "varchar",
-      "binary",
-      "date",
-      "timestamp",
-      "interval_year_month",
-      "interval_day_time",
-      "decimal"
-  };
-
-  private void chooseSchema() {
-    HashSet hashSet = null;
-    boolean allTypes;
-    boolean onlyOne = (r.nextInt(100) == 7);
-    if (onlyOne) {
-      columnCount = 1;
-      allTypes = false;
-    } else {
-      allTypes = r.nextBoolean();
-      if (allTypes) {
-        // One of each type.
-        columnCount = possibleHiveTypeNames.length;
-        hashSet = new HashSet<Integer>();
-      } else {
-        columnCount = 1 + r.nextInt(20);
-      }
-    }
-    typeNames = new ArrayList<String>(columnCount);
-    primitiveCategories = new PrimitiveCategory[columnCount];
-    primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
-    primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
-    List<String> columnNames = new ArrayList<String>(columnCount);
-    for (int c = 0; c < columnCount; c++) {
-      columnNames.add(String.format("col%d", c));
-      String typeName;
-
-      if (onlyOne) {
-        typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
-      } else {
-        int typeNum;
-        if (allTypes) {
-          while (true) {
-            typeNum = r.nextInt(possibleHiveTypeNames.length);
-            Integer typeNumInteger = new Integer(typeNum);
-            if (!hashSet.contains(typeNumInteger)) {
-              hashSet.add(typeNumInteger);
-              break;
-            }
-          }
-        } else {
-          typeNum = r.nextInt(possibleHiveTypeNames.length);
-        }
-        typeName = possibleHiveTypeNames[typeNum];
-      }
-      if (typeName.equals("char")) {
-        int maxLength = 1 + r.nextInt(100);
-        typeName = String.format("char(%d)", maxLength);
-      } else if (typeName.equals("varchar")) {
-        int maxLength = 1 + r.nextInt(100);
-        typeName = String.format("varchar(%d)", maxLength);
-      } else if (typeName.equals("decimal")) {
-        typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
-      }
-      PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
-      primitiveTypeInfos[c] = primitiveTypeInfo;
-      PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
-      primitiveCategories[c] = primitiveCategory;
-      primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
-      typeNames.add(typeName);
-    }
-    rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
-  }
-
-  public Object[][] randomRows(int n) {
-    Object[][] result = new Object[n][];
-    for (int i = 0; i < n; i++) {
-      result[i] = randomRow();
-    }
-    return result;
-  }
-
-  public Object[] randomRow() {
-    Object row[] = new Object[columnCount];
-    for (int c = 0; c < columnCount; c++) {
-      Object object = randomObject(c);
-      if (object == null) {
-        throw new Error("Unexpected null for column " + c);
-      }
-      row[c] = getWritableObject(c, object);
-      if (row[c] == null) {
-        throw new Error("Unexpected null for writable for column " + c);
-      }
-    }
-    return row;
-  }
-
-  public static void sort(Object[][] rows, ObjectInspector oi) {
-    for (int i = 0; i < rows.length; i++) {
-      for (int j = i + 1; j < rows.length; j++) {
-        if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) {
-          Object[] t = rows[i];
-          rows[i] = rows[j];
-          rows[j] = t;
-        }
-      }
-    }
-  }
-
-  public void sort(Object[][] rows) {
-    RandomRowObjectSource.sort(rows, rowStructObjectInspector);
-  }
-
-  public Object getWritableObject(int column, Object object) {
-    ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
-    PrimitiveCategory primitiveCategory = primitiveCategories[column];
-    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
-    switch (primitiveCategory) {
-    case BOOLEAN:
-      return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
-    case BYTE:
-      return ((WritableByteObjectInspector) objectInspector).create((byte) object);
-    case SHORT:
-      return ((WritableShortObjectInspector) objectInspector).create((short) object);
-    case INT:
-      return ((WritableIntObjectInspector) objectInspector).create((int) object);
-    case LONG:
-      return ((WritableLongObjectInspector) objectInspector).create((long) object);
-    case DATE:
-      return ((WritableDateObjectInspector) objectInspector).create((Date) object);
-    case FLOAT:
-      return ((WritableFloatObjectInspector) objectInspector).create((float) object);
-    case DOUBLE:
-      return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
-    case STRING:
-      return ((WritableStringObjectInspector) objectInspector).create((String) object);
-    case CHAR:
-      {
-        WritableHiveCharObjectInspector writableCharObjectInspector =
-                new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
-        return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
-      }
-    case VARCHAR:
-      {
-        WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
-                new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
-        return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
-      }
-    case BINARY:
-      return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
-    case TIMESTAMP:
-      return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
-    case INTERVAL_YEAR_MONTH:
-      return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
-    case INTERVAL_DAY_TIME:
-      return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
-    case DECIMAL:
-      {
-        WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
-                new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
-        return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
-      }
-    default:
-      throw new Error("Unknown primitive category " + primitiveCategory);
-    }
-  }
-
-  public Object randomObject(int column) {
-    PrimitiveCategory primitiveCategory = primitiveCategories[column];
-    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
-    switch (primitiveCategory) {
-    case BOOLEAN:
-      return Boolean.valueOf(r.nextInt(1) == 1);
-    case BYTE:
-      return Byte.valueOf((byte) r.nextInt());
-    case SHORT:
-      return Short.valueOf((short) r.nextInt());
-    case INT:
-      return Integer.valueOf(r.nextInt());
-    case LONG:
-      return Long.valueOf(r.nextLong());
-    case DATE:
-      return RandomTypeUtil.getRandDate(r);
-    case FLOAT:
-      return Float.valueOf(r.nextFloat() * 10 - 5);
-    case DOUBLE:
-      return Double.valueOf(r.nextDouble() * 10 - 5);
-    case STRING:
-      return RandomTypeUtil.getRandString(r);
-    case CHAR:
-      return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
-    case VARCHAR:
-      return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
-    case BINARY:
-      return getRandBinary(r, 1 + r.nextInt(100));
-    case TIMESTAMP:
-      return RandomTypeUtil.getRandTimestamp(r);
-    case INTERVAL_YEAR_MONTH:
-      return getRandIntervalYearMonth(r);
-    case INTERVAL_DAY_TIME:
-      return getRandIntervalDayTime(r);
-    case DECIMAL:
-      return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
-    default:
-      throw new Error("Unknown primitive category " + primitiveCategory);
-    }
-  }
-
-  public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
-    int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
-    String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
-    HiveChar hiveChar = new HiveChar(randomString, maxLength);
-    return hiveChar;
-  }
-
-  public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
-    int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
-    String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
-    HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
-    return hiveVarchar;
-  }
-
-  public static byte[] getRandBinary(Random r, int len){
-    byte[] bytes = new byte[len];
-    for (int j = 0; j < len; j++){
-      bytes[j] = Byte.valueOf((byte) r.nextInt());
-    }
-    return bytes;
-  }
-
-  private static final String DECIMAL_CHARS = "0123456789";
-
-  public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
-    while (true) {
-      StringBuilder sb = new StringBuilder();
-      int precision = 1 + r.nextInt(18);
-      int scale = 0 + r.nextInt(precision + 1);
-
-      int integerDigits = precision - scale;
-
-      if (r.nextBoolean()) {
-        sb.append("-");
-      }
-
-      if (integerDigits == 0) {
-        sb.append("0");
-      } else {
-        sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits));
-      }
-      if (scale != 0) {
-        sb.append(".");
-        sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale));
-      }
-
-      HiveDecimal bd = HiveDecimal.create(sb.toString());
-      if (bd.scale() > bd.precision()) {
-        // Sometimes weird decimals are produced?
-        continue;
-      }
-
-      return bd;
-    }
-  }
-
-  public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
-    String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
-    String intervalYearMonthStr = String.format("%s%d-%d",
-        yearMonthSignStr,
-        Integer.valueOf(1800 + r.nextInt(500)),  // year
-        Integer.valueOf(0 + r.nextInt(12)));     // month
-    HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
-    return intervalYearMonthVal;
-  }
-
-  public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
-    String optionalNanos = "";
-    if (r.nextInt(2) == 1) {
-      optionalNanos = String.format(".%09d",
-          Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
-    }
-    String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
-    String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
-        yearMonthSignStr,
-        Integer.valueOf(1 + r.nextInt(28)),      // day
-        Integer.valueOf(0 + r.nextInt(24)),      // hour
-        Integer.valueOf(0 + r.nextInt(60)),      // minute
-        Integer.valueOf(0 + r.nextInt(60)),      // second
-        optionalNanos);
-    HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
-    return intervalDayTimeVal;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
index e562ce3..fb41420 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java
@@ -96,7 +96,7 @@ public interface SerializeWrite {
 
   /*
    * STRING.
-   * 
+   *
    * Can be used to write CHAR and VARCHAR when the caller takes responsibility for
    * truncation/padding issues.
    */

http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
index 7e9f94e..d0f0d5e 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
@@ -21,6 +21,8 @@ package org.apache.hadoop.hive.serde2.lazy.fast;
 import java.io.UnsupportedEncodingException;
 import java.nio.charset.CharacterCodingException;
 import java.sql.Date;
+import java.util.Arrays;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -72,6 +74,7 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
   private int end;
   private int fieldCount;
   private int fieldIndex;
+  private int parseFieldIndex;
   private int fieldStart;
   private int fieldLength;
 
@@ -123,6 +126,41 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
     fieldIndex = -1;
   }
 
+  /*
+   * Get detailed read position information to help diagnose exceptions.
+   */
+  public String getDetailedReadPositionString() {
+    StringBuffer sb = new StringBuffer();
+
+    sb.append("Reading byte[] of length ");
+    sb.append(bytes.length);
+    sb.append(" at start offset ");
+    sb.append(start);
+    sb.append(" for length ");
+    sb.append(end - start);
+    sb.append(" to read ");
+    sb.append(fieldCount);
+    sb.append(" fields with types ");
+    sb.append(Arrays.toString(typeInfos));
+    sb.append(".  ");
+    if (fieldIndex == -1) {
+      sb.append("Error during field delimitor parsing of field #");
+      sb.append(parseFieldIndex);
+    } else {
+      sb.append("Read field #");
+      sb.append(fieldIndex);
+      sb.append(" at field start position ");
+      sb.append(startPosition[fieldIndex]);
+      int currentFieldLength = startPosition[fieldIndex + 1] - startPosition[fieldIndex] - 1;
+      sb.append(" for field length ");
+      sb.append(currentFieldLength);
+      sb.append(" current read offset ");
+      sb.append(offset);
+    }
+
+    return sb.toString();
+  }
+
   /**
    * Parse the byte[] and fill each field.
    *
@@ -132,27 +170,29 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
   private void parse() {
 
     int structByteEnd = end;
-    int fieldId = 0;
     int fieldByteBegin = start;
     int fieldByteEnd = start;
 
+    // Kept as a member variable to support getDetailedReadPositionString.
+    parseFieldIndex = 0;
+
     // Go through all bytes in the byte[]
     while (fieldByteEnd <= structByteEnd) {
       if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) {
         // Reached the end of a field?
-        if (lastColumnTakesRest && fieldId == fieldCount - 1) {
+        if (lastColumnTakesRest && parseFieldIndex == fieldCount - 1) {
           fieldByteEnd = structByteEnd;
         }
-        startPosition[fieldId] = fieldByteBegin;
-        fieldId++;
-        if (fieldId == fieldCount || fieldByteEnd == structByteEnd) {
+        startPosition[parseFieldIndex] = fieldByteBegin;
+        parseFieldIndex++;
+        if (parseFieldIndex == fieldCount || fieldByteEnd == structByteEnd) {
           // All fields have been parsed, or bytes have been parsed.
           // We need to set the startPosition of fields.length to ensure we
           // can use the same formula to calculate the length of each field.
           // For missing fields, their starting positions will all be the same,
           // which will make their lengths to be -1 and uncheckedGetField will
           // return these fields as NULLs.
-          for (int i = fieldId; i <= fieldCount; i++) {
+          for (int i = parseFieldIndex; i <= fieldCount; i++) {
             startPosition[i] = fieldByteEnd + 1;
           }
           break;
@@ -176,8 +216,8 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
     }
 
     // Missing fields?
-    if (!missingFieldWarned && fieldId < fieldCount) {
-      doMissingFieldWarned(fieldId);
+    if (!missingFieldWarned && parseFieldIndex < fieldCount) {
+      doMissingFieldWarned(parseFieldIndex);
     }
   }
 
@@ -511,12 +551,8 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
     return missingFieldWarned;
   }
   @Override
-  public boolean readBeyondBufferRangeWarned() {
-    return extraFieldWarned;
-  }
-  @Override
   public boolean bufferRangeHasExtraDataWarned() {
-    return false;  // UNDONE: Get rid of...
+    return false;
   }
 
   private void doExtraFieldWarned() {

http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
index bbb35c7..0df1d79 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.serde2.lazybinary.fast;
 
 import java.io.EOFException;
 import java.io.IOException;
+import java.util.Arrays;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -31,6 +33,7 @@ import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
 import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.WritableUtils;
 
 /*
  * Directly deserialize with the caller reading field-by-field the LazyBinary serialization format.
@@ -54,6 +57,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
   private int offset;
   private int end;
   private int fieldCount;
+  private int fieldStart;
   private int fieldIndex;
   private byte nullByte;
 
@@ -62,7 +66,6 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
   private VLong tempVLong;
 
   private boolean readBeyondConfiguredFieldsWarned;
-  private boolean readBeyondBufferRangeWarned;
   private boolean bufferRangeHasExtraDataWarned;
 
   public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) {
@@ -71,7 +74,6 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
     tempVInt = new VInt();
     tempVLong = new VLong();
     readBeyondConfiguredFieldsWarned = false;
-    readBeyondBufferRangeWarned = false;
     bufferRangeHasExtraDataWarned = false;
   }
 
@@ -93,6 +95,32 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
   }
 
   /*
+   * Get detailed read position information to help diagnose exceptions.
+   */
+  public String getDetailedReadPositionString() {
+    StringBuffer sb = new StringBuffer();
+
+    sb.append("Reading byte[] of length ");
+    sb.append(bytes.length);
+    sb.append(" at start offset ");
+    sb.append(start);
+    sb.append(" for length ");
+    sb.append(end - start);
+    sb.append(" to read ");
+    sb.append(fieldCount);
+    sb.append(" fields with types ");
+    sb.append(Arrays.toString(typeInfos));
+    sb.append(".  Read field #");
+    sb.append(fieldIndex);
+    sb.append(" at field start position ");
+    sb.append(fieldStart);
+    sb.append(" current read offset ");
+    sb.append(offset);
+
+    return sb.toString();
+  }
+
+  /*
    * Reads the NULL information for a field.
    *
    * @return Returns true when the field is NULL; reading is positioned to the next field.
@@ -111,11 +139,13 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
       return true;
     }
 
+    fieldStart = offset;
+
     if (fieldIndex == 0) {
       // The rest of the range check for fields after the first is below after checking
       // the NULL byte.
       if (offset >= end) {
-        warnBeyondEof();
+        throw new EOFException();
       }
       nullByte = bytes[offset++];
     }
@@ -129,9 +159,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
 
       // Make sure there is at least one byte that can be read for a value.
       if (offset >= end) {
-        // Careful: since we may be dealing with NULLs in the final NULL byte, we check after
-        // the NULL byte check..
-        warnBeyondEof();
+        throw new EOFException();
       }
 
       /*
@@ -149,33 +177,33 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
       case SHORT:
         // Last item -- ok to be at end.
         if (offset + 2 > end) {
-          warnBeyondEof();
+          throw new EOFException();
         }
         currentShort = LazyBinaryUtils.byteArrayToShort(bytes, offset);
         offset += 2;
         break;
       case INT:
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+          throw new EOFException();
+        }
         LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
         offset += tempVInt.length;
-        // Last item -- ok to be at end.
-        if (offset > end) {
-          warnBeyondEof();
-        }
         currentInt = tempVInt.value;
         break;
       case LONG:
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+          throw new EOFException();
+        }
         LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
         offset += tempVLong.length;
-        // Last item -- ok to be at end.
-        if (offset > end) {
-          warnBeyondEof();
-        }
         currentLong = tempVLong.value;
         break;
       case FLOAT:
         // Last item -- ok to be at end.
         if (offset + 4 > end) {
-          warnBeyondEof();
+          throw new EOFException();
         }
         currentFloat = Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, offset));
         offset += 4;
@@ -183,7 +211,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
       case DOUBLE:
         // Last item -- ok to be at end.
         if (offset + 8 > end) {
-          warnBeyondEof();
+          throw new EOFException();
         }
         currentDouble = Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, offset));
         offset += 8;
@@ -195,18 +223,19 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
       case VARCHAR:
         {
           // using vint instead of 4 bytes
+          // Parse the first byte of a vint/vlong to determine the number of bytes.
+          if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+            throw new EOFException();
+          }
           LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
           offset += tempVInt.length;
-          // Could be last item for empty string -- ok to be at end.
-          if (offset > end) {
-            warnBeyondEof();
-          }
+
           int saveStart = offset;
           int length = tempVInt.value;
           offset += length;
           // Last item -- ok to be at end.
           if (offset > end) {
-            warnBeyondEof();
+            throw new EOFException();
           }
 
           currentBytes = bytes;
@@ -215,12 +244,12 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
         }
         break;
       case DATE:
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+          throw new EOFException();
+        }
         LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
         offset += tempVInt.length;
-        // Last item -- ok to be at end.
-        if (offset > end) {
-          warnBeyondEof();
-        }
 
         currentDateWritable.set(tempVInt.value);
         break;
@@ -231,34 +260,37 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
           offset += length;
           // Last item -- ok to be at end.
           if (offset > end) {
-            warnBeyondEof();
+            throw new EOFException();
           }
 
           currentTimestampWritable.set(bytes, saveStart);
         }
         break;
       case INTERVAL_YEAR_MONTH:
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+          throw new EOFException();
+        }
         LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
         offset += tempVInt.length;
-        // Last item -- ok to be at end.
-        if (offset > end) {
-          warnBeyondEof();
-        }
+
         currentHiveIntervalYearMonthWritable.set(tempVInt.value);
         break;
       case INTERVAL_DAY_TIME:
+        // The first bounds check requires at least one more byte beyond for 2nd int (hence >=).
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
+          throw new EOFException();
+        }
         LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
         offset += tempVLong.length;
-        if (offset >= end) {
-          // Overshoot or not enough for next item.
-          warnBeyondEof();
+
+        // Parse the first byte of a vint/vlong to determine the number of bytes.
+        if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+          throw new EOFException();
         }
         LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
         offset += tempVInt.length;
-        // Last item -- ok to be at end.
-        if (offset > end) {
-          warnBeyondEof();
-        }
 
         currentHiveIntervalDayTimeWritable.set(tempVLong.value, tempVInt.value);
         break;
@@ -269,23 +301,27 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
 
           // These calls are to see how much data there is. The setFromBytes call below will do the same
           // readVInt reads but actually unpack the decimal.
+
+          // The first bounds check requires at least one more byte beyond for 2nd int (hence >=).
+          // Parse the first byte of a vint/vlong to determine the number of bytes.
+          if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
+            throw new EOFException();
+          }
           LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
           int saveStart = offset;
           offset += tempVInt.length;
-          if (offset >= end) {
-            // Overshoot or not enough for next item.
-            warnBeyondEof();
+
+          // Parse the first byte of a vint/vlong to determine the number of bytes.
+          if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
+            throw new EOFException();
           }
           LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
           offset += tempVInt.length;
-          if (offset >= end) {
-            // Overshoot or not enough for next item.
-            warnBeyondEof();
-          }
+
           offset += tempVInt.value;
           // Last item -- ok to be at end.
           if (offset > end) {
-            warnBeyondEof();
+            throw new EOFException();
           }
           int length = offset - saveStart;
 
@@ -327,7 +363,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
       if ((fieldIndex % 8) == 0) {
         // Get next null byte.
         if (offset >= end) {
-          warnBeyondEof();
+          throw new EOFException();
         }
         nullByte = bytes[offset++];
       }
@@ -363,23 +399,7 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
     return readBeyondConfiguredFieldsWarned;
   }
   @Override
-  public boolean readBeyondBufferRangeWarned() {
-    return readBeyondBufferRangeWarned;
-  }
-  @Override
   public boolean bufferRangeHasExtraDataWarned() {
     return bufferRangeHasExtraDataWarned;
   }
-
-  private void warnBeyondEof() throws EOFException {
-    if (!readBeyondBufferRangeWarned) {
-      // Warn only once.
-      int length = end - start;
-      LOG.info("Reading beyond buffer range! Buffer range " +  start
-          + " for length " + length + " but reading more... "
-          + "(total buffer length " + bytes.length + ")"
-          + "  Ignoring similar problems.");
-      readBeyondBufferRangeWarned = true;
-    }
-  }
 }


Mime
View raw message