Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id D1E3319317 for ; Tue, 5 Apr 2016 00:33:49 +0000 (UTC) Received: (qmail 67271 invoked by uid 500); 5 Apr 2016 00:33:49 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 67226 invoked by uid 500); 5 Apr 2016 00:33:49 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 67212 invoked by uid 99); 5 Apr 2016 00:33:49 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 05 Apr 2016 00:33:49 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 1B722DFC6E; Tue, 5 Apr 2016 00:33:49 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: prasanthj@apache.org To: commits@hive.apache.org Message-Id: <2e42f68a1dd54932839405293df45d2d@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-13330: ORC vectorized string dictionary reader does not differentiate null vs empty string dictionary (Prasanth Jayachandran reviewed by Matt McCline) Date: Tue, 5 Apr 2016 00:33:49 +0000 (UTC) Repository: hive Updated Branches: refs/heads/branch-2.0 bae499c91 -> 63f53069c HIVE-13330: ORC vectorized string dictionary reader does not differentiate null vs empty string dictionary (Prasanth Jayachandran reviewed by Matt McCline) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/63f53069 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/63f53069 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/63f53069 Branch: refs/heads/branch-2.0 Commit: 63f53069c438efc868de5c8f9bd8ce35aa229ae2 Parents: bae499c Author: Prasanth Jayachandran Authored: Mon Apr 4 19:33:01 2016 -0500 Committer: Prasanth Jayachandran Committed: Mon Apr 4 19:33:36 2016 -0500 ---------------------------------------------------------------------- .../hive/ql/io/orc/TreeReaderFactory.java | 20 +++++-- .../vector_orc_string_reader_empty_dict.q | 20 +++++++ .../vector_orc_string_reader_empty_dict.q.out | 62 ++++++++++++++++++++ 3 files changed, 97 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/63f53069/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java index 2c13d68..92965ff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java @@ -1679,6 +1679,7 @@ public class TreeReaderFactory { * stripe. */ public static class StringDictionaryTreeReader extends TreeReader { + private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; private DynamicByteArray dictionaryBuffer; private int[] dictionaryOffsets; protected IntegerReader reader; @@ -1858,11 +1859,20 @@ public class TreeReaderFactory { } result.isRepeating = scratchlcv.isRepeating; } else { - // Entire stripe contains null strings. - result.isRepeating = true; - result.noNulls = false; - result.isNull[0] = true; - result.setRef(0, "".getBytes(), 0, 0); + if (dictionaryOffsets == null) { + // Entire stripe contains null strings. + result.isRepeating = true; + result.noNulls = false; + result.isNull[0] = true; + result.setRef(0, EMPTY_BYTE_ARRAY, 0, 0); + } else { + // stripe contains nulls and empty strings + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + result.setRef(i, EMPTY_BYTE_ARRAY, 0, 0); + } + } + } } return result; } http://git-wip-us.apache.org/repos/asf/hive/blob/63f53069/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q b/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q new file mode 100644 index 0000000..0e8a743 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q @@ -0,0 +1,20 @@ +create table orcstr (vcol varchar(20)) stored as orc; + +insert overwrite table orcstr select null from src; + +SET hive.fetch.task.conversion=none; + +SET hive.vectorized.execution.enabled=false; +select vcol from orcstr limit 1; + +SET hive.vectorized.execution.enabled=true; +select vcol from orcstr limit 1; + +insert overwrite table orcstr select "" from src; + +SET hive.vectorized.execution.enabled=false; +select vcol from orcstr limit 1; + +SET hive.vectorized.execution.enabled=true; +select vcol from orcstr limit 1; + http://git-wip-us.apache.org/repos/asf/hive/blob/63f53069/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out b/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out new file mode 100644 index 0000000..4f00bed --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcstr +POSTHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcstr +PREHOOK: query: insert overwrite table orcstr select null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: insert overwrite table orcstr select "" from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select "" from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### + +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +