hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject hive git commit: HIVE-11222 : LLAP: occasional NPE in parallel queries in ORC reader (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Date Sat, 11 Jul 2015 00:43:17 GMT
Repository: hive
Updated Branches:
  refs/heads/llap b92d3dd07 -> 1972e8432


HIVE-11222 : LLAP: occasional NPE in parallel queries in ORC reader (Sergey Shelukhin, reviewed
by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1972e843
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1972e843
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1972e843

Branch: refs/heads/llap
Commit: 1972e843225ca96d00ee5e8df2c1450ff301f462
Parents: b92d3dd
Author: Sergey Shelukhin <sershe@apache.org>
Authored: Fri Jul 10 17:43:06 2015 -0700
Committer: Sergey Shelukhin <sershe@apache.org>
Committed: Fri Jul 10 17:43:06 2015 -0700

----------------------------------------------------------------------
 .../hive/llap/io/encoded/OrcEncodedDataReader.java |  4 +++-
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java    | 17 ++++++++++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1972e843/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index 1e7281e..5cf0780 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -550,7 +550,9 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
           }
           // Create new key object to reuse for gets; we've used the old one to put in cache.
           stripeKey = new OrcBatchKey(fileId, 0, 0);
-        } else {
+        }
+        // We might have got an old value from cache; recheck it has indexes.
+        if (!value.hasAllIndexes(globalInc)) {
           if (DebugUtils.isTraceOrcEnabled()) {
             LlapIoImpl.LOG.info("Updating indexes in stripe " + stripeKey.stripeIx
                 + " metadata for includes: " + DebugUtils.toString(globalInc));

http://git-wip-us.apache.org/repos/asf/hive/blob/1972e843/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index f560e98..3b98562 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.llap.io.api.cache.LlapMemoryBuffer;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
 import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils.ByteBufferAllocatorPool;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
@@ -705,9 +706,16 @@ public class RecordReaderImpl implements RecordReader {
       boolean hasSelected = false, hasSkipped = false;
       for (int rowGroup = 0; rowGroup < result.length; ++rowGroup) {
         for (int pred = 0; pred < leafValues.length; ++pred) {
-          if (filterColumns[pred] != -1) {
-            OrcProto.ColumnStatistics stats =
-                indexes[filterColumns[pred]].getEntry(rowGroup).getStatistics();
+          int columnIx = filterColumns[pred];
+          if (columnIx != -1) {
+            if (indexes[columnIx] == null) {
+              throw new AssertionError("Index is not populated for " + columnIx);
+            }
+            RowIndexEntry entry = indexes[columnIx].getEntry(rowGroup);
+            if (entry == null) {
+              throw new AssertionError("RG is not populated for " + columnIx + " rg " + rowGroup);
+            }
+            OrcProto.ColumnStatistics stats = entry.getStatistics();
             OrcProto.BloomFilter bf = null;
             if (bloomFilterIndices != null && bloomFilterIndices[filterColumns[pred]]
!= null) {
               bf = bloomFilterIndices[filterColumns[pred]].getBloomFilter(rowGroup);
@@ -715,8 +723,7 @@ public class RecordReaderImpl implements RecordReader {
             leafValues[pred] = evaluatePredicateProto(stats, sargLeaves.get(pred), bf);
             if (LOG.isTraceEnabled()) {
               LOG.trace("Stats = " + stats);
-              LOG.trace("Setting " + sargLeaves.get(pred) + " to " +
-                  leafValues[pred]);
+              LOG.trace("Setting " + sargLeaves.get(pred) + " to " + leafValues[pred]);
             }
           } else {
             // the column is a virtual column


Mime
View raw message