Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id D477B1867C for ; Sat, 11 Jul 2015 00:43:17 +0000 (UTC) Received: (qmail 76923 invoked by uid 500); 11 Jul 2015 00:43:17 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 76882 invoked by uid 500); 11 Jul 2015 00:43:17 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 76871 invoked by uid 99); 11 Jul 2015 00:43:17 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 11 Jul 2015 00:43:17 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 6F073E03CF; Sat, 11 Jul 2015 00:43:17 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sershe@apache.org To: commits@hive.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-11222 : LLAP: occasional NPE in parallel queries in ORC reader (Sergey Shelukhin, reviewed by Prasanth Jayachandran) Date: Sat, 11 Jul 2015 00:43:17 +0000 (UTC) Repository: hive Updated Branches: refs/heads/llap b92d3dd07 -> 1972e8432 HIVE-11222 : LLAP: occasional NPE in parallel queries in ORC reader (Sergey Shelukhin, reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1972e843 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1972e843 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1972e843 Branch: refs/heads/llap Commit: 1972e843225ca96d00ee5e8df2c1450ff301f462 Parents: b92d3dd Author: Sergey Shelukhin Authored: Fri Jul 10 17:43:06 2015 -0700 Committer: Sergey Shelukhin Committed: Fri Jul 10 17:43:06 2015 -0700 ---------------------------------------------------------------------- .../hive/llap/io/encoded/OrcEncodedDataReader.java | 4 +++- .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1972e843/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index 1e7281e..5cf0780 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -550,7 +550,9 @@ public class OrcEncodedDataReader extends CallableWithNdc } // Create new key object to reuse for gets; we've used the old one to put in cache. stripeKey = new OrcBatchKey(fileId, 0, 0); - } else { + } + // We might have got an old value from cache; recheck it has indexes. + if (!value.hasAllIndexes(globalInc)) { if (DebugUtils.isTraceOrcEnabled()) { LlapIoImpl.LOG.info("Updating indexes in stripe " + stripeKey.stripeIx + " metadata for includes: " + DebugUtils.toString(globalInc)); http://git-wip-us.apache.org/repos/asf/hive/blob/1972e843/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index f560e98..3b98562 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.llap.io.api.cache.LlapMemoryBuffer; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO; +import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry; import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils.ByteBufferAllocatorPool; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; @@ -705,9 +706,16 @@ public class RecordReaderImpl implements RecordReader { boolean hasSelected = false, hasSkipped = false; for (int rowGroup = 0; rowGroup < result.length; ++rowGroup) { for (int pred = 0; pred < leafValues.length; ++pred) { - if (filterColumns[pred] != -1) { - OrcProto.ColumnStatistics stats = - indexes[filterColumns[pred]].getEntry(rowGroup).getStatistics(); + int columnIx = filterColumns[pred]; + if (columnIx != -1) { + if (indexes[columnIx] == null) { + throw new AssertionError("Index is not populated for " + columnIx); + } + RowIndexEntry entry = indexes[columnIx].getEntry(rowGroup); + if (entry == null) { + throw new AssertionError("RG is not populated for " + columnIx + " rg " + rowGroup); + } + OrcProto.ColumnStatistics stats = entry.getStatistics(); OrcProto.BloomFilter bf = null; if (bloomFilterIndices != null && bloomFilterIndices[filterColumns[pred]] != null) { bf = bloomFilterIndices[filterColumns[pred]].getBloomFilter(rowGroup); @@ -715,8 +723,7 @@ public class RecordReaderImpl implements RecordReader { leafValues[pred] = evaluatePredicateProto(stats, sargLeaves.get(pred), bf); if (LOG.isTraceEnabled()) { LOG.trace("Stats = " + stats); - LOG.trace("Setting " + sargLeaves.get(pred) + " to " + - leafValues[pred]); + LOG.trace("Setting " + sargLeaves.get(pred) + " to " + leafValues[pred]); } } else { // the column is a virtual column