hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject svn commit: r1487884 - in /hive/branches/vectorization: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/ ql/src/test/org/apache/hadoop/hive/ql/io/orc/ se...
Date Thu, 30 May 2013 15:06:54 GMT
Author: omalley
Date: Thu May 30 15:06:54 2013
New Revision: 1487884

URL: http://svn.apache.org/r1487884
Log:
HIVE-4160 Vectorized reader support for Byte Boolean and Timestamp (Sarvesh 
Sakalanaga via omalley)

Modified:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
    hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
(original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
Thu May 30 15:06:54 2013
@@ -18,16 +18,20 @@
 
 package org.apache.hadoop.hive.ql.exec.vector;
 
+import java.sql.Timestamp;
 import java.util.List;
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
@@ -88,6 +92,28 @@ public class VectorizedBatchUtil {
       // NOTE: The default value for null fields in vectorization is 1 for int types, NaN
for
       // float/double. String types have no default value for null.
       switch (poi.getPrimitiveCategory()) {
+      case BOOLEAN: {
+        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
+        if (writableCol != null) {
+          lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
+          lcv.isNull[rowIndex] = false;
+        } else {
+          lcv.vector[rowIndex] = 1;
+          SetNullColIsNullValue(lcv, rowIndex);
+        }
+      }
+        break;
+      case BYTE: {
+        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
+        if (writableCol != null) {
+          lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
+          lcv.isNull[rowIndex] = false;
+        } else {
+          lcv.vector[rowIndex] = 1;
+          SetNullColIsNullValue(lcv, rowIndex);
+        }
+      }
+        break;
       case SHORT: {
         LongColumnVector lcv = (LongColumnVector) batch.cols[i];
         if (writableCol != null) {
@@ -143,6 +169,18 @@ public class VectorizedBatchUtil {
         }
       }
         break;
+      case TIMESTAMP: {
+        LongColumnVector lcv = (LongColumnVector) batch.cols[i];
+        if (writableCol != null) {
+          Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
+          lcv.vector[rowIndex] = (t.getTime() * 1000000) + (t.getNanos() % 1000000);
+          lcv.isNull[rowIndex] = false;
+        } else {
+          lcv.vector[rowIndex] = 1;
+          SetNullColIsNullValue(lcv, rowIndex);
+        }
+      }
+        break;
       case STRING: {
         BytesColumnVector bcv = (BytesColumnVector) batch.cols[i];
         if (writableCol != null) {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
(original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
Thu May 30 15:06:54 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector;
 
 import java.nio.ByteBuffer;
+import java.sql.Timestamp;
 import java.util.List;
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -28,7 +29,9 @@ import org.apache.hadoop.hive.serde2.Ser
 import org.apache.hadoop.hive.serde2.SerDeStats;
 import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
 import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.lazy.LazyLong;
+import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp;
 import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
@@ -121,6 +124,18 @@ public class VectorizedColumnarSerDe ext
               }
 
               switch (poi.getPrimitiveCategory()) {
+              case BOOLEAN: {
+                LongColumnVector lcv = (LongColumnVector) batch.cols[k];
+                // In vectorization true is stored as 1 and false as 0
+                boolean b = lcv.vector[rowIndex] == 1 ? true : false;
+                if (b) {
+                  serializeVectorStream.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length);
+                } else {
+                  serializeVectorStream.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length);
+                }
+              }
+                break;
+              case BYTE:
               case SHORT:
               case INT:
               case LONG:
@@ -141,6 +156,16 @@ public class VectorizedColumnarSerDe ext
                     serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams
                         .getNeedsEscape());
                 break;
+              case TIMESTAMP:
+                LongColumnVector tcv = (LongColumnVector) batch.cols[k];
+                long timeInNanoSec = tcv.vector[rowIndex];
+                Timestamp t = new Timestamp(0);
+                t.setTime((timeInNanoSec)/1000000);
+                t.setNanos((int)((t.getNanos()) + (timeInNanoSec % 1000000)));
+                TimestampWritable tw = new TimestampWritable();
+                tw.set(t);
+                LazyTimestamp.writeUTF8(serializeVectorStream, tw);
+                break;
               default:
                 throw new UnsupportedOperationException(
                     "Vectorizaton is not supported for datatype:"

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
(original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
Thu May 30 15:06:54 2013
@@ -197,11 +197,14 @@ public class VectorizedRowBatchCtx {
       case PRIMITIVE: {
         PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
         // Vectorization currently only supports the following data types:
-        // SHORT, INT, LONG, FLOAT, DOUBLE, STRING
+        // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING and TIMESTAMP
         switch (poi.getPrimitiveCategory()) {
+        case BOOLEAN:
+        case BYTE:
         case SHORT:
         case INT:
         case LONG:
+        case TIMESTAMP:
           result.cols[j] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
           break;
         case FLOAT:
@@ -232,9 +235,6 @@ public class VectorizedRowBatchCtx {
     return result;
   }
 
-
-
-
   /**
    * Adds the row to the batch after deserializing the row
    *

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
(original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
Thu May 30 15:06:54 2013
@@ -20,8 +20,10 @@ package org.apache.hadoop.hive.ql.io.orc
 import java.io.EOFException;
 import java.io.IOException;
 
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
 class BitFieldReader {
-  private RunLengthByteReader input;
+  private final RunLengthByteReader input;
   private final int bitSize;
   private int current;
   private int bitsLeft;
@@ -60,6 +62,23 @@ class BitFieldReader {
     return result & mask;
   }
 
+  void nextVector(LongColumnVector previous, long previousLen)
+      throws IOException {
+    previous.isRepeating = true;
+    for (int i = 0; i < previousLen; i++) {
+      if (!previous.isNull[i]) {
+        previous.vector[i] = next();
+      } else {
+        // The default value of null for int types in vectorized
+        // processing is 1, so set that if the value is null
+        previous.vector[i] = 1;
+      }
+      if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i]))
{
+        previous.isRepeating = false;
+      }
+    }
+  }
+
   void seek(PositionProvider index) throws IOException {
     input.seek(index);
     int consumed = (int) index.getNext();

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
(original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
Thu May 30 15:06:54 2013
@@ -240,8 +240,19 @@ class RecordReaderImpl implements Record
 
     @Override
     Object nextVector(Object previousVector, long batchSize) throws IOException {
-      throw new UnsupportedOperationException(
-          "NextVector is not supported operation on Boolean type");
+      LongColumnVector result = null;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, batchSize);
+      return result;
     }
   }
 
@@ -284,8 +295,19 @@ class RecordReaderImpl implements Record
 
     @Override
     Object nextVector(Object previousVector, long batchSize) throws IOException {
-      throw new UnsupportedOperationException(
-          "NextVector is not supported operation for Byte type");
+      LongColumnVector result = null;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, batchSize);
+      return result;
     }
 
     @Override
@@ -708,6 +730,7 @@ class RecordReaderImpl implements Record
   private static class TimestampTreeReader extends TreeReader{
     private RunLengthIntegerReader data;
     private RunLengthIntegerReader nanos;
+    private final LongColumnVector nanoVector = new LongColumnVector();
 
     TimestampTreeReader(int columnId) {
       super(columnId);
@@ -758,8 +781,47 @@ class RecordReaderImpl implements Record
 
     @Override
     Object nextVector(Object previousVector, long batchSize) throws IOException {
-      throw new UnsupportedOperationException(
-          "NextVector is not supported operation for TimeStamp type");
+      LongColumnVector result = null;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      data.nextVector(result, batchSize);
+      nanoVector.isNull = result.isNull;
+      nanos.nextVector(nanoVector, batchSize);
+
+      if (!(result.isRepeating && nanoVector.isRepeating)) {
+
+        // Non repeating values preset in the vector. Iterate thru the vector and populate
the time
+        for (int i = 0; i < batchSize; i++) {
+          if (!result.isNull[i]) {
+            result.vector[i] = (result.vector[i] + WriterImpl.BASE_TIMESTAMP)
+                * WriterImpl.MILLIS_PER_SECOND;
+            nanoVector.vector[i] = parseNanos(nanoVector.vector[i]);
+            // fix the rounding when we divided by 1000.
+            if (result.vector[i] >= 0) {
+              result.vector[i] += nanoVector.vector[i] / 1000000;
+            } else {
+              result.vector[i] -= nanoVector.vector[i] / 1000000;
+            }
+            // Convert millis into nanos and add the nano vector value to it
+            result.vector[i] = (result.vector[i] * 1000000)
+                + nanoVector.vector[i];
+          }
+        }
+      } else {
+        // Entire vector has repeating values
+        if (!result.isNull[0]) {
+          result.vector[0] = (result.vector[0] * 1000000)
+              + nanoVector.vector[0];
+        }
+      }
+      return result;
     }
 
     private static int parseNanos(long serialized) {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
(original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
Thu May 30 15:06:54 2013
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.io.orc
 import java.io.EOFException;
 import java.io.IOException;
 
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
 /**
  * A reader that reads a sequence of bytes. A control byte is read before
  * each run with positive values 0 to 127 meaning 3 to 130 repetitions. If the
@@ -82,6 +84,23 @@ class RunLengthByteReader {
     return result;
   }
 
+  void nextVector(LongColumnVector previous, long previousLen)
+      throws IOException {
+    previous.isRepeating = true;
+    for (int i = 0; i < previousLen; i++) {
+      if (!previous.isNull[i]) {
+        previous.vector[i] = next();
+      } else {
+        // The default value of null for int types in vectorized
+        // processing is 1, so set that if the value is null
+        previous.vector[i] = 1;
+      }
+      if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i]))
{
+        previous.isRepeating = false;
+      }
+    }
+  }
+
   void seek(PositionProvider index) throws IOException {
     input.seek(index);
     int consumed = (int) index.getNext();

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
(original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
Thu May 30 15:06:54 2013
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.exec.v
 
 import java.io.File;
 import java.io.IOException;
+import java.sql.Timestamp;
+import java.util.Calendar;
 import java.util.List;
 import java.util.Properties;
 
@@ -36,13 +38,16 @@ import org.apache.hadoop.hive.serde2.Ser
 import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
 import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
 import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
@@ -82,10 +87,10 @@ public class TestVectorizedRowBatchCtx {
     // Set the configuration parameters
     tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "6");
     tbl.setProperty("columns",
-        "ashort,aint,along,adouble,afloat,astring");
+        "ashort,aint,along,adouble,afloat,astring,abyte,aboolean,atimestamp");
     tbl.setProperty("columns.types",
-        "smallint:int:bigint:double:float:string");
-    colCount = 6;
+        "smallint:int:bigint:double:float:string:tinyint:boolean:timestamp");
+    colCount = 9;
     tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
 
     try {
@@ -109,7 +114,8 @@ public class TestVectorizedRowBatchCtx {
       BytesRefArrayWritable bytes = new BytesRefArrayWritable(colCount);
       BytesRefWritable cu;
 
-      if (i % 3 != 0) {
+       if (i % 3 != 0) {
+      //if (i < 100) {
         cu = new BytesRefWritable((i + "").getBytes("UTF-8"), 0, (i + "").getBytes("UTF-8").length);
         bytes.set(0, cu);
 
@@ -132,6 +138,20 @@ public class TestVectorizedRowBatchCtx {
         cu = new BytesRefWritable(("Test string").getBytes("UTF-8"), 0,
             ("Test string").getBytes("UTF-8").length);
         bytes.set(5, cu);
+
+        cu = new BytesRefWritable((1 + "").getBytes("UTF-8"), 0,
+            (1 + "").getBytes("UTF-8").length);
+        bytes.set(6, cu);
+
+        cu = new BytesRefWritable(("true").getBytes("UTF-8"), 0,
+            ("true").getBytes("UTF-8").length);
+        bytes.set(7, cu);
+
+        Timestamp t = new Timestamp(Calendar.getInstance().getTime().getTime());
+        cu = new BytesRefWritable(t.toString().getBytes("UTF-8"), 0,
+            t.toString().getBytes("UTF-8").length);
+        bytes.set(8, cu);
+
       } else {
         cu = new BytesRefWritable((i + "").getBytes("UTF-8"), 0, (i + "").getBytes("UTF-8").length);
         bytes.set(0, cu);
@@ -151,6 +171,19 @@ public class TestVectorizedRowBatchCtx {
         cu = new BytesRefWritable(("Test string").getBytes("UTF-8"), 0,
             ("Test string").getBytes("UTF-8").length);
         bytes.set(5, cu);
+
+        cu = new BytesRefWritable(new byte[0], 0, 0);
+        bytes.set(6, cu);
+
+        cu = new BytesRefWritable(new byte[0], 0, 0);
+        bytes.set(7, cu);
+
+//        cu = new BytesRefWritable(new byte[0], 0, 0);
+//        bytes.set(8, cu);
+        Timestamp t = new Timestamp(Calendar.getInstance().getTime().getTime());
+        cu = new BytesRefWritable(t.toString().getBytes("UTF-8"), 0,
+            t.toString().getBytes("UTF-8").length);
+        bytes.set(8, cu);
       }
       writer.append(bytes);
     }
@@ -166,7 +199,7 @@ public class TestVectorizedRowBatchCtx {
         .getObjectInspector();
     List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
 
-    Assert.assertEquals("Field size should be 6", colCount, fieldRefs.size());
+    Assert.assertEquals("Field size should be 9", colCount, fieldRefs.size());
 
     // Create the context
     VectorizedRowBatchCtx ctx = new VectorizedRowBatchCtx(oi, oi, serDe, null);
@@ -213,6 +246,17 @@ public class TestVectorizedRowBatchCtx {
         Object writableCol = poi.getPrimitiveWritableObject(fieldData);
         if (writableCol != null) {
           switch (poi.getPrimitiveCategory()) {
+          case BOOLEAN: {
+            LongColumnVector lcv = (LongColumnVector) batch.cols[j];
+            Assert.assertEquals(true, lcv.vector[i] == (((BooleanWritable) writableCol).get()
? 1
+                : 0));
+          }
+            break;
+          case BYTE: {
+            LongColumnVector lcv = (LongColumnVector) batch.cols[j];
+            Assert.assertEquals(true, lcv.vector[i] == (long) ((ByteWritable) writableCol).get());
+          }
+            break;
           case SHORT: {
             LongColumnVector lcv = (LongColumnVector) batch.cols[j];
             Assert.assertEquals(true, lcv.vector[i] == ((ShortWritable) writableCol).get());
@@ -247,6 +291,13 @@ public class TestVectorizedRowBatchCtx {
             Assert.assertEquals(true, a.equals(b));
           }
             break;
+          case TIMESTAMP: {
+            LongColumnVector tcv = (LongColumnVector) batch.cols[j];
+            Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
+            long timeInNanoSec = (t.getTime() * 1000000) + (t.getNanos() % 1000000);
+            Assert.assertEquals(true, tcv.vector[i] == timeInNanoSec);
+          }
+            break;
           default:
             Assert.assertEquals("Unknown type", false);
           }
@@ -275,18 +326,18 @@ public class TestVectorizedRowBatchCtx {
   @Test
   public void TestCtx() throws Exception {
 
-      InitSerde();
-      WriteRCFile(this.fs, this.testFilePath, this.conf);
-      VectorizedRowBatch batch = GetRowBatch();
-      ValidateRowBatch(batch);
-
-      // Test VectorizedColumnarSerDe
-      VectorizedColumnarSerDe vcs = new VectorizedColumnarSerDe();
-      vcs.initialize(this.conf, tbl);
-      Writable w = vcs.serializeVector(batch, (StructObjectInspector) serDe
-          .getObjectInspector());
-      BytesRefArrayWritable[] refArray = (BytesRefArrayWritable[])((ObjectWritable)w).get();
-      vcs.deserializeVector(refArray, 10, batch);
-      ValidateRowBatch(batch);
+    InitSerde();
+    WriteRCFile(this.fs, this.testFilePath, this.conf);
+    VectorizedRowBatch batch = GetRowBatch();
+    ValidateRowBatch(batch);
+
+    // Test VectorizedColumnarSerDe
+    VectorizedColumnarSerDe vcs = new VectorizedColumnarSerDe();
+    vcs.initialize(this.conf, tbl);
+    Writable w = vcs.serializeVector(batch, (StructObjectInspector) serDe
+        .getObjectInspector());
+    BytesRefArrayWritable[] refArray = (BytesRefArrayWritable[]) ((ObjectWritable) w).get();
+    vcs.deserializeVector(refArray, 10, batch);
+    ValidateRowBatch(batch);
   }
 }

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
(original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
Thu May 30 15:06:54 2013
@@ -19,6 +19,8 @@
 package org.apache.hadoop.hive.ql.io.orc;
 
 import java.io.File;
+import java.sql.Timestamp;
+import java.util.Calendar;
 import java.util.Random;
 
 import junit.framework.Assert;
@@ -29,8 +31,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -58,18 +60,24 @@ public class TestVectorizedORCReader {
   }
 
   static class MyRecord {
+    private final Boolean bo;
+    private final Byte by;
     private final Integer i;
     private final Long l;
     private final Short s;
     private final Double d;
     private final String k;
+    private final Timestamp t;
 
-    MyRecord(Integer i, Long l, Short s, Double d, String k) {
+    MyRecord(Boolean bo, Byte by, Integer i, Long l, Short s, Double d, String k, Timestamp
t) {
+      this.bo = bo;
+      this.by = by;
       this.i = i;
       this.l = l;
       this.s = s;
       this.d = d;
       this.k = k;
+      this.t = t;
     }
   }
 
@@ -96,11 +104,11 @@ public class TestVectorizedORCReader {
         "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
         "way"};
     for (int i = 0; i < 21000; ++i) {
-      if ((i % 3) != 0) {
-        writer.addRow(new MyRecord(i, (long) 200, (short) (300 + i), (double) (400 + i),
-            words[r1.nextInt(words.length)]));
+      if ((i % 7) != 0) {
+        writer.addRow(new MyRecord(((i % 3) == 0), (byte)(i % 5), i, (long) 200, (short)
(300 + i), (double) (400 + i),
+            words[r1.nextInt(words.length)], new Timestamp(Calendar.getInstance().getTime().getTime())));
       } else {
-        writer.addRow(new MyRecord(i, (long) 200, null, null, null));
+        writer.addRow(new MyRecord(null, null, i, (long) 200, null, null, null, null));
       }
     }
     writer.close();
@@ -122,8 +130,21 @@ public class TestVectorizedORCReader {
       for (int i = 0; i < batch.size; i++) {
         row = (OrcStruct) rr.next((Object) row);
         for (int j = 0; j < batch.cols.length; j++) {
-          Object a = ((Writable) row.getFieldValue(j));
+          Object a = (row.getFieldValue(j));
           Object b = batch.cols[j].getWritableObject(i);
+          // Boolean values are stores a 1's and 0's, so convert and compare
+          if (a instanceof BooleanWritable) {
+            Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
+            Assert.assertEquals(true, temp.toString().equals(b.toString()));
+            continue;
+          }
+          // Timestamps are stored as long, so convert and compare
+          if (a instanceof Timestamp) {
+            Timestamp t = ((Timestamp) a);
+            Long timeInNanoSec = (t.getTime() * 1000000) + t.getNanos();
+            Assert.assertEquals(true, timeInNanoSec.toString().equals(b.toString()));
+            continue;
+          }
           if (null == a) {
             Assert.assertEquals(true, (b == null || (b instanceof NullWritable)));
           } else {
@@ -134,17 +155,23 @@ public class TestVectorizedORCReader {
 
       // Check repeating
       Assert.assertEquals(false, batch.cols[0].isRepeating);
-      Assert.assertEquals(true, batch.cols[1].isRepeating);
+      Assert.assertEquals(false, batch.cols[1].isRepeating);
       Assert.assertEquals(false, batch.cols[2].isRepeating);
-      Assert.assertEquals(false, batch.cols[3].isRepeating);
+      Assert.assertEquals(true, batch.cols[3].isRepeating);
       Assert.assertEquals(false, batch.cols[4].isRepeating);
+      Assert.assertEquals(false, batch.cols[5].isRepeating);
+      Assert.assertEquals(false, batch.cols[6].isRepeating);
+      Assert.assertEquals(false, batch.cols[7].isRepeating);
 
       // Check non null
-      Assert.assertEquals(true, batch.cols[0].noNulls);
-      Assert.assertEquals(true, batch.cols[1].noNulls);
-      Assert.assertEquals(false, batch.cols[2].noNulls);
-      Assert.assertEquals(false, batch.cols[3].noNulls);
+      Assert.assertEquals(false, batch.cols[0].noNulls);
+      Assert.assertEquals(false, batch.cols[1].noNulls);
+      Assert.assertEquals(true, batch.cols[2].noNulls);
+      Assert.assertEquals(true, batch.cols[3].noNulls);
       Assert.assertEquals(false, batch.cols[4].noNulls);
+      Assert.assertEquals(false, batch.cols[5].noNulls);
+      Assert.assertEquals(false, batch.cols[6].noNulls);
+      Assert.assertEquals(false, batch.cols[7].noNulls);
     }
     Assert.assertEquals(false, rr.hasNext());
   }

Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
(original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
Thu May 30 15:06:54 2013
@@ -127,8 +127,8 @@ public final class LazyUtils {
     }
   }
 
-  private static byte[] trueBytes = {(byte) 't', 'r', 'u', 'e'};
-  private static byte[] falseBytes = {(byte) 'f', 'a', 'l', 's', 'e'};
+  public static byte[] trueBytes = {(byte) 't', 'r', 'u', 'e'};
+  public static byte[] falseBytes = {(byte) 'f', 'a', 'l', 's', 'e'};
 
   /**
    * Write the bytes with special characters escaped.



Mime
View raw message