hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject hive git commit: HIVE-9711: ORC Vectorization DoubleColumnVector.isRepeating=false if all entries are NaN (Gopal V reviewed by Prasanth Jayachandran)
Date Wed, 22 Apr 2015 22:46:04 GMT
Repository: hive
Updated Branches:
  refs/heads/master 0af6cb427 -> cd596ce77


HIVE-9711: ORC Vectorization DoubleColumnVector.isRepeating=false if all entries are NaN (Gopal
V reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cd596ce7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cd596ce7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cd596ce7

Branch: refs/heads/master
Commit: cd596ce77bfed0ca2fddc2de58ee932b4868fb8e
Parents: 0af6cb4
Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Authored: Wed Apr 22 15:44:55 2015 -0700
Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Committed: Wed Apr 22 15:44:55 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/io/orc/TreeReaderFactory.java       | 98 ++++++++++++++------
 1 file changed, 70 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cd596ce7/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
index 3c2a44f..3ff6b14 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
@@ -640,7 +640,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public Object nextVector(Object previousVector, long batchSize) throws IOException {
+    public Object nextVector(Object previousVector, final long batchSize) throws IOException
{
       final DoubleColumnVector result;
       if (previousVector == null) {
         result = new DoubleColumnVector();
@@ -651,23 +651,43 @@ public class TreeReaderFactory {
       // Read present/isNull stream
       super.nextVector(result, batchSize);
 
-      // Read value entries based on isNull entries
-      for (int i = 0; i < batchSize; i++) {
-        if (!result.isNull[i]) {
-          result.vector[i] = utils.readFloat(stream);
-        } else {
+      final boolean hasNulls = !result.noNulls;
+      boolean allNulls = hasNulls;
 
-          // If the value is not present then set NaN
-          result.vector[i] = Double.NaN;
+      if (hasNulls) {
+        // conditions to ensure bounds checks skips
+        for (int i = 0; batchSize <= result.isNull.length && i < batchSize;
i++) {
+          allNulls = allNulls & result.isNull[i];
         }
-      }
-
-      // Set isRepeating flag
-      result.isRepeating = true;
-      for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) {
-        if (result.vector[i] != result.vector[i + 1]) {
+        if (allNulls) {
+          result.vector[0] = Double.NaN;
+          result.isRepeating = true;
+        } else {
+          // some nulls
           result.isRepeating = false;
+          // conditions to ensure bounds checks skips
+          for (int i = 0; batchSize <= result.isNull.length
+              && batchSize <= result.vector.length && i < batchSize;
i++) {
+            if (!result.isNull[i]) {
+              result.vector[i] = utils.readFloat(stream);
+            } else {
+              // If the value is not present then set NaN
+              result.vector[i] = Double.NaN;
+            }
+          }
         }
+      } else {
+        // no nulls & > 1 row (check repeating)
+        boolean repeating = (batchSize > 1);
+        final float f1 = utils.readFloat(stream);
+        result.vector[0] = f1;
+        // conditions to ensure bounds checks skips
+        for (int i = 1; i < batchSize && batchSize <= result.vector.length;
i++) {
+          final float f2 = utils.readFloat(stream);
+          repeating = repeating && (f1 == f2);
+          result.vector[i] = f2;
+        }
+        result.isRepeating = repeating;
       }
       return result;
     }
@@ -733,7 +753,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public Object nextVector(Object previousVector, long batchSize) throws IOException {
+    public Object nextVector(Object previousVector, final long batchSize) throws IOException
{
       final DoubleColumnVector result;
       if (previousVector == null) {
         result = new DoubleColumnVector();
@@ -744,23 +764,45 @@ public class TreeReaderFactory {
       // Read present/isNull stream
       super.nextVector(result, batchSize);
 
-      // Read value entries based on isNull entries
-      for (int i = 0; i < batchSize; i++) {
-        if (!result.isNull[i]) {
-          result.vector[i] = utils.readDouble(stream);
-        } else {
-          // If the value is not present then set NaN
-          result.vector[i] = Double.NaN;
-        }
-      }
+      final boolean hasNulls = !result.noNulls;
+      boolean allNulls = hasNulls;
 
-      // Set isRepeating flag
-      result.isRepeating = true;
-      for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) {
-        if (result.vector[i] != result.vector[i + 1]) {
+      if (hasNulls) {
+        // conditions to ensure bounds checks skips
+        for (int i = 0; i < batchSize && batchSize <= result.isNull.length;
i++) {
+          allNulls = allNulls & result.isNull[i];
+        }
+        if (allNulls) {
+          result.vector[0] = Double.NaN;
+          result.isRepeating = true;
+        } else {
+          // some nulls
           result.isRepeating = false;
+          // conditions to ensure bounds checks skips
+          for (int i = 0; batchSize <= result.isNull.length
+              && batchSize <= result.vector.length && i < batchSize;
i++) {
+            if (!result.isNull[i]) {
+              result.vector[i] = utils.readDouble(stream);
+            } else {
+              // If the value is not present then set NaN
+              result.vector[i] = Double.NaN;
+            }
+          }
         }
+      } else {
+        // no nulls
+        boolean repeating = (batchSize > 1);
+        final double d1 = utils.readDouble(stream);
+        result.vector[0] = d1;
+        // conditions to ensure bounds checks skips
+        for (int i = 1; i < batchSize && batchSize <= result.vector.length;
i++) {
+          final double d2 = utils.readDouble(stream);
+          repeating = repeating && (d1 == d2);
+          result.vector[i] = d2;
+        }
+        result.isRepeating = repeating;
       }
+
       return result;
     }
 


Mime
View raw message