Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 7F52B17E34 for ; Wed, 22 Apr 2015 22:46:04 +0000 (UTC) Received: (qmail 14691 invoked by uid 500); 22 Apr 2015 22:46:04 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 14649 invoked by uid 500); 22 Apr 2015 22:46:04 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 14638 invoked by uid 99); 22 Apr 2015 22:46:04 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 22 Apr 2015 22:46:04 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 2A359E03CF; Wed, 22 Apr 2015 22:46:04 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: prasanthj@apache.org To: commits@hive.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-9711: ORC Vectorization DoubleColumnVector.isRepeating=false if all entries are NaN (Gopal V reviewed by Prasanth Jayachandran) Date: Wed, 22 Apr 2015 22:46:04 +0000 (UTC) Repository: hive Updated Branches: refs/heads/master 0af6cb427 -> cd596ce77 HIVE-9711: ORC Vectorization DoubleColumnVector.isRepeating=false if all entries are NaN (Gopal V reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cd596ce7 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cd596ce7 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cd596ce7 Branch: refs/heads/master Commit: cd596ce77bfed0ca2fddc2de58ee932b4868fb8e Parents: 0af6cb4 Author: Prasanth Jayachandran Authored: Wed Apr 22 15:44:55 2015 -0700 Committer: Prasanth Jayachandran Committed: Wed Apr 22 15:44:55 2015 -0700 ---------------------------------------------------------------------- .../hive/ql/io/orc/TreeReaderFactory.java | 98 ++++++++++++++------ 1 file changed, 70 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/cd596ce7/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java index 3c2a44f..3ff6b14 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java @@ -640,7 +640,7 @@ public class TreeReaderFactory { } @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { + public Object nextVector(Object previousVector, final long batchSize) throws IOException { final DoubleColumnVector result; if (previousVector == null) { result = new DoubleColumnVector(); @@ -651,23 +651,43 @@ public class TreeReaderFactory { // Read present/isNull stream super.nextVector(result, batchSize); - // Read value entries based on isNull entries - for (int i = 0; i < batchSize; i++) { - if (!result.isNull[i]) { - result.vector[i] = utils.readFloat(stream); - } else { + final boolean hasNulls = !result.noNulls; + boolean allNulls = hasNulls; - // If the value is not present then set NaN - result.vector[i] = Double.NaN; + if (hasNulls) { + // conditions to ensure bounds checks skips + for (int i = 0; batchSize <= result.isNull.length && i < batchSize; i++) { + allNulls = allNulls & result.isNull[i]; } - } - - // Set isRepeating flag - result.isRepeating = true; - for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) { - if (result.vector[i] != result.vector[i + 1]) { + if (allNulls) { + result.vector[0] = Double.NaN; + result.isRepeating = true; + } else { + // some nulls result.isRepeating = false; + // conditions to ensure bounds checks skips + for (int i = 0; batchSize <= result.isNull.length + && batchSize <= result.vector.length && i < batchSize; i++) { + if (!result.isNull[i]) { + result.vector[i] = utils.readFloat(stream); + } else { + // If the value is not present then set NaN + result.vector[i] = Double.NaN; + } + } } + } else { + // no nulls & > 1 row (check repeating) + boolean repeating = (batchSize > 1); + final float f1 = utils.readFloat(stream); + result.vector[0] = f1; + // conditions to ensure bounds checks skips + for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) { + final float f2 = utils.readFloat(stream); + repeating = repeating && (f1 == f2); + result.vector[i] = f2; + } + result.isRepeating = repeating; } return result; } @@ -733,7 +753,7 @@ public class TreeReaderFactory { } @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { + public Object nextVector(Object previousVector, final long batchSize) throws IOException { final DoubleColumnVector result; if (previousVector == null) { result = new DoubleColumnVector(); @@ -744,23 +764,45 @@ public class TreeReaderFactory { // Read present/isNull stream super.nextVector(result, batchSize); - // Read value entries based on isNull entries - for (int i = 0; i < batchSize; i++) { - if (!result.isNull[i]) { - result.vector[i] = utils.readDouble(stream); - } else { - // If the value is not present then set NaN - result.vector[i] = Double.NaN; - } - } + final boolean hasNulls = !result.noNulls; + boolean allNulls = hasNulls; - // Set isRepeating flag - result.isRepeating = true; - for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) { - if (result.vector[i] != result.vector[i + 1]) { + if (hasNulls) { + // conditions to ensure bounds checks skips + for (int i = 0; i < batchSize && batchSize <= result.isNull.length; i++) { + allNulls = allNulls & result.isNull[i]; + } + if (allNulls) { + result.vector[0] = Double.NaN; + result.isRepeating = true; + } else { + // some nulls result.isRepeating = false; + // conditions to ensure bounds checks skips + for (int i = 0; batchSize <= result.isNull.length + && batchSize <= result.vector.length && i < batchSize; i++) { + if (!result.isNull[i]) { + result.vector[i] = utils.readDouble(stream); + } else { + // If the value is not present then set NaN + result.vector[i] = Double.NaN; + } + } } + } else { + // no nulls + boolean repeating = (batchSize > 1); + final double d1 = utils.readDouble(stream); + result.vector[0] = d1; + // conditions to ensure bounds checks skips + for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) { + final double d2 = utils.readDouble(stream); + repeating = repeating && (d1 == d2); + result.vector[i] = d2; + } + result.isRepeating = repeating; } + return result; }