hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject svn commit: r1650699 [1/4] - in /hive/trunk/ql/src: gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/ java/org/apache/hadoop/hive/ql/io/orc/ protobuf/org/apache/hadoop/hive/ql/io/orc/ test/org/apache/hadoop/hive/ql/io/orc/ test/resources/ test/re...
Date Sat, 10 Jan 2015 00:09:15 GMT
Author: prasanthj
Date: Sat Jan 10 00:09:14 2015
New Revision: 1650699

URL: http://svn.apache.org/r1650699
Log:
HIVE-4639: Add has null flag to ORC internal index (Prasanth Jayachandran reviewed by Gopal V)

Added:
    hive/trunk/ql/src/test/resources/orc-file-has-null.out
Modified:
    hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
    hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
    hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
    hive/trunk/ql/src/test/resources/orc-file-dump.out
    hive/trunk/ql/src/test/results/clientpositive/alter_merge_orc.q.out
    hive/trunk/ql/src/test/results/clientpositive/alter_merge_stats_orc.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out
    hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
    hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
    hive/trunk/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
    hive/trunk/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
    hive/trunk/ql/src/test/results/clientpositive/orc_analyze.q.out
    hive/trunk/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/alter_merge_orc.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/alter_merge_stats_orc.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_ptf.q.out

Modified: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Sat Jan 10 00:09:14 2015
@@ -4796,6 +4796,16 @@ public final class OrcProto {
      * <code>optional .org.apache.hadoop.hive.ql.io.orc.TimestampStatistics timestampStatistics = 9;</code>
      */
     org.apache.hadoop.hive.ql.io.orc.OrcProto.TimestampStatisticsOrBuilder getTimestampStatisticsOrBuilder();
+
+    // optional bool hasNull = 10;
+    /**
+     * <code>optional bool hasNull = 10;</code>
+     */
+    boolean hasHasNull();
+    /**
+     * <code>optional bool hasNull = 10;</code>
+     */
+    boolean getHasNull();
   }
   /**
    * Protobuf type {@code org.apache.hadoop.hive.ql.io.orc.ColumnStatistics}
@@ -4957,6 +4967,11 @@ public final class OrcProto {
               bitField0_ |= 0x00000100;
               break;
             }
+            case 80: {
+              bitField0_ |= 0x00000200;
+              hasNull_ = input.readBool();
+              break;
+            }
           }
         }
       } catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -5189,6 +5204,22 @@ public final class OrcProto {
       return timestampStatistics_;
     }
 
+    // optional bool hasNull = 10;
+    public static final int HASNULL_FIELD_NUMBER = 10;
+    private boolean hasNull_;
+    /**
+     * <code>optional bool hasNull = 10;</code>
+     */
+    public boolean hasHasNull() {
+      return ((bitField0_ & 0x00000200) == 0x00000200);
+    }
+    /**
+     * <code>optional bool hasNull = 10;</code>
+     */
+    public boolean getHasNull() {
+      return hasNull_;
+    }
+
     private void initFields() {
       numberOfValues_ = 0L;
       intStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.getDefaultInstance();
@@ -5199,6 +5230,7 @@ public final class OrcProto {
       dateStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DateStatistics.getDefaultInstance();
       binaryStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.BinaryStatistics.getDefaultInstance();
       timestampStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.TimestampStatistics.getDefaultInstance();
+      hasNull_ = false;
     }
     private byte memoizedIsInitialized = -1;
     public final boolean isInitialized() {
@@ -5239,6 +5271,9 @@ public final class OrcProto {
       if (((bitField0_ & 0x00000100) == 0x00000100)) {
         output.writeMessage(9, timestampStatistics_);
       }
+      if (((bitField0_ & 0x00000200) == 0x00000200)) {
+        output.writeBool(10, hasNull_);
+      }
       getUnknownFields().writeTo(output);
     }
 
@@ -5284,6 +5319,10 @@ public final class OrcProto {
         size += com.google.protobuf.CodedOutputStream
           .computeMessageSize(9, timestampStatistics_);
       }
+      if (((bitField0_ & 0x00000200) == 0x00000200)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeBoolSize(10, hasNull_);
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -5458,6 +5497,8 @@ public final class OrcProto {
           timestampStatisticsBuilder_.clear();
         }
         bitField0_ = (bitField0_ & ~0x00000100);
+        hasNull_ = false;
+        bitField0_ = (bitField0_ & ~0x00000200);
         return this;
       }
 
@@ -5554,6 +5595,10 @@ public final class OrcProto {
         } else {
           result.timestampStatistics_ = timestampStatisticsBuilder_.build();
         }
+        if (((from_bitField0_ & 0x00000200) == 0x00000200)) {
+          to_bitField0_ |= 0x00000200;
+        }
+        result.hasNull_ = hasNull_;
         result.bitField0_ = to_bitField0_;
         onBuilt();
         return result;
@@ -5597,6 +5642,9 @@ public final class OrcProto {
         if (other.hasTimestampStatistics()) {
           mergeTimestampStatistics(other.getTimestampStatistics());
         }
+        if (other.hasHasNull()) {
+          setHasNull(other.getHasNull());
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -6593,6 +6641,39 @@ public final class OrcProto {
         return timestampStatisticsBuilder_;
       }
 
+      // optional bool hasNull = 10;
+      private boolean hasNull_ ;
+      /**
+       * <code>optional bool hasNull = 10;</code>
+       */
+      public boolean hasHasNull() {
+        return ((bitField0_ & 0x00000200) == 0x00000200);
+      }
+      /**
+       * <code>optional bool hasNull = 10;</code>
+       */
+      public boolean getHasNull() {
+        return hasNull_;
+      }
+      /**
+       * <code>optional bool hasNull = 10;</code>
+       */
+      public Builder setHasNull(boolean value) {
+        bitField0_ |= 0x00000200;
+        hasNull_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional bool hasNull = 10;</code>
+       */
+      public Builder clearHasNull() {
+        bitField0_ = (bitField0_ & ~0x00000200);
+        hasNull_ = false;
+        onChanged();
+        return this;
+      }
+
       // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.ColumnStatistics)
     }
 
@@ -17657,7 +17738,7 @@ public final class OrcProto {
       "\"2\n\016DateStatistics\022\017\n\007minimum\030\001 \001(\021\022\017\n\007m",
       "aximum\030\002 \001(\021\"7\n\023TimestampStatistics\022\017\n\007m" +
       "inimum\030\001 \001(\022\022\017\n\007maximum\030\002 \001(\022\"\037\n\020BinaryS" +
-      "tatistics\022\013\n\003sum\030\001 \001(\022\"\234\005\n\020ColumnStatist" +
+      "tatistics\022\013\n\003sum\030\001 \001(\022\"\255\005\n\020ColumnStatist" +
       "ics\022\026\n\016numberOfValues\030\001 \001(\004\022J\n\rintStatis" +
       "tics\030\002 \001(\01323.org.apache.hadoop.hive.ql.i" +
       "o.orc.IntegerStatistics\022L\n\020doubleStatist" +
@@ -17674,60 +17755,60 @@ public final class OrcProto {
       "org.apache.hadoop.hive.ql.io.orc.BinaryS" +
       "tatistics\022R\n\023timestampStatistics\030\t \001(\01325" +
       ".org.apache.hadoop.hive.ql.io.orc.Timest" +
-      "ampStatistics\"n\n\rRowIndexEntry\022\025\n\tpositi",
-      "ons\030\001 \003(\004B\002\020\001\022F\n\nstatistics\030\002 \001(\01322.org." +
-      "apache.hadoop.hive.ql.io.orc.ColumnStati" +
-      "stics\"J\n\010RowIndex\022>\n\005entry\030\001 \003(\0132/.org.a" +
-      "pache.hadoop.hive.ql.io.orc.RowIndexEntr" +
-      "y\"\331\001\n\006Stream\022;\n\004kind\030\001 \002(\0162-.org.apache." +
-      "hadoop.hive.ql.io.orc.Stream.Kind\022\016\n\006col" +
-      "umn\030\002 \001(\r\022\016\n\006length\030\003 \001(\004\"r\n\004Kind\022\013\n\007PRE" +
-      "SENT\020\000\022\010\n\004DATA\020\001\022\n\n\006LENGTH\020\002\022\023\n\017DICTIONA" +
-      "RY_DATA\020\003\022\024\n\020DICTIONARY_COUNT\020\004\022\r\n\tSECON" +
-      "DARY\020\005\022\r\n\tROW_INDEX\020\006\"\263\001\n\016ColumnEncoding",
-      "\022C\n\004kind\030\001 \002(\01625.org.apache.hadoop.hive." +
-      "ql.io.orc.ColumnEncoding.Kind\022\026\n\016diction" +
-      "arySize\030\002 \001(\r\"D\n\004Kind\022\n\n\006DIRECT\020\000\022\016\n\nDIC" +
-      "TIONARY\020\001\022\r\n\tDIRECT_V2\020\002\022\021\n\rDICTIONARY_V" +
-      "2\020\003\"\214\001\n\014StripeFooter\0229\n\007streams\030\001 \003(\0132(." +
-      "org.apache.hadoop.hive.ql.io.orc.Stream\022" +
-      "A\n\007columns\030\002 \003(\01320.org.apache.hadoop.hiv" +
-      "e.ql.io.orc.ColumnEncoding\"\370\002\n\004Type\0229\n\004k" +
-      "ind\030\001 \002(\0162+.org.apache.hadoop.hive.ql.io" +
-      ".orc.Type.Kind\022\024\n\010subtypes\030\002 \003(\rB\002\020\001\022\022\n\n",
-      "fieldNames\030\003 \003(\t\022\025\n\rmaximumLength\030\004 \001(\r\022" +
-      "\021\n\tprecision\030\005 \001(\r\022\r\n\005scale\030\006 \001(\r\"\321\001\n\004Ki" +
-      "nd\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003" +
-      "INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n" +
-      "\n\006STRING\020\007\022\n\n\006BINARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n" +
-      "\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022" +
-      "\013\n\007DECIMAL\020\016\022\010\n\004DATE\020\017\022\013\n\007VARCHAR\020\020\022\010\n\004C" +
-      "HAR\020\021\"x\n\021StripeInformation\022\016\n\006offset\030\001 \001" +
-      "(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\ndataLength\030\003 " +
-      "\001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014numberOfRow",
-      "s\030\005 \001(\004\"/\n\020UserMetadataItem\022\014\n\004name\030\001 \002(" +
-      "\t\022\r\n\005value\030\002 \002(\014\"X\n\020StripeStatistics\022D\n\010" +
-      "colStats\030\001 \003(\01322.org.apache.hadoop.hive." +
-      "ql.io.orc.ColumnStatistics\"S\n\010Metadata\022G" +
-      "\n\013stripeStats\030\001 \003(\01322.org.apache.hadoop." +
-      "hive.ql.io.orc.StripeStatistics\"\356\002\n\006Foot" +
-      "er\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rcontentLengt" +
-      "h\030\002 \001(\004\022D\n\007stripes\030\003 \003(\01323.org.apache.ha" +
-      "doop.hive.ql.io.orc.StripeInformation\0225\n" +
-      "\005types\030\004 \003(\0132&.org.apache.hadoop.hive.ql",
-      ".io.orc.Type\022D\n\010metadata\030\005 \003(\01322.org.apa" +
-      "che.hadoop.hive.ql.io.orc.UserMetadataIt" +
-      "em\022\024\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007" +
-      " \003(\01322.org.apache.hadoop.hive.ql.io.orc." +
-      "ColumnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r" +
-      "\"\334\001\n\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n" +
-      "\013compression\030\002 \001(\01621.org.apache.hadoop.h" +
-      "ive.ql.io.orc.CompressionKind\022\034\n\024compres" +
-      "sionBlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001" +
-      "\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\rwriterVersio",
-      "n\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKi" +
-      "nd\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZ" +
-      "O\020\003"
+      "ampStatistics\022\017\n\007hasNull\030\n \001(\010\"n\n\rRowInd",
+      "exEntry\022\025\n\tpositions\030\001 \003(\004B\002\020\001\022F\n\nstatis" +
+      "tics\030\002 \001(\01322.org.apache.hadoop.hive.ql.i" +
+      "o.orc.ColumnStatistics\"J\n\010RowIndex\022>\n\005en" +
+      "try\030\001 \003(\0132/.org.apache.hadoop.hive.ql.io" +
+      ".orc.RowIndexEntry\"\331\001\n\006Stream\022;\n\004kind\030\001 " +
+      "\002(\0162-.org.apache.hadoop.hive.ql.io.orc.S" +
+      "tream.Kind\022\016\n\006column\030\002 \001(\r\022\016\n\006length\030\003 \001" +
+      "(\004\"r\n\004Kind\022\013\n\007PRESENT\020\000\022\010\n\004DATA\020\001\022\n\n\006LEN" +
+      "GTH\020\002\022\023\n\017DICTIONARY_DATA\020\003\022\024\n\020DICTIONARY" +
+      "_COUNT\020\004\022\r\n\tSECONDARY\020\005\022\r\n\tROW_INDEX\020\006\"\263",
+      "\001\n\016ColumnEncoding\022C\n\004kind\030\001 \002(\01625.org.ap" +
+      "ache.hadoop.hive.ql.io.orc.ColumnEncodin" +
+      "g.Kind\022\026\n\016dictionarySize\030\002 \001(\r\"D\n\004Kind\022\n" +
+      "\n\006DIRECT\020\000\022\016\n\nDICTIONARY\020\001\022\r\n\tDIRECT_V2\020" +
+      "\002\022\021\n\rDICTIONARY_V2\020\003\"\214\001\n\014StripeFooter\0229\n" +
+      "\007streams\030\001 \003(\0132(.org.apache.hadoop.hive." +
+      "ql.io.orc.Stream\022A\n\007columns\030\002 \003(\01320.org." +
+      "apache.hadoop.hive.ql.io.orc.ColumnEncod" +
+      "ing\"\370\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apache." +
+      "hadoop.hive.ql.io.orc.Type.Kind\022\024\n\010subty",
+      "pes\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rmax" +
+      "imumLength\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022\r\n\005s" +
+      "cale\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE" +
+      "\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOA" +
+      "T\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020\010\022" +
+      "\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006STR" +
+      "UCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DATE\020\017\022" +
+      "\013\n\007VARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021StripeInformat" +
+      "ion\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004" +
+      "\022\022\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001",
+      "(\004\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadata" +
+      "Item\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"X\n\020Str" +
+      "ipeStatistics\022D\n\010colStats\030\001 \003(\01322.org.ap" +
+      "ache.hadoop.hive.ql.io.orc.ColumnStatist" +
+      "ics\"S\n\010Metadata\022G\n\013stripeStats\030\001 \003(\01322.o" +
+      "rg.apache.hadoop.hive.ql.io.orc.StripeSt" +
+      "atistics\"\356\002\n\006Footer\022\024\n\014headerLength\030\001 \001(" +
+      "\004\022\025\n\rcontentLength\030\002 \001(\004\022D\n\007stripes\030\003 \003(" +
+      "\01323.org.apache.hadoop.hive.ql.io.orc.Str" +
+      "ipeInformation\0225\n\005types\030\004 \003(\0132&.org.apac",
+      "he.hadoop.hive.ql.io.orc.Type\022D\n\010metadat" +
+      "a\030\005 \003(\01322.org.apache.hadoop.hive.ql.io.o" +
+      "rc.UserMetadataItem\022\024\n\014numberOfRows\030\006 \001(" +
+      "\004\022F\n\nstatistics\030\007 \003(\01322.org.apache.hadoo" +
+      "p.hive.ql.io.orc.ColumnStatistics\022\026\n\016row" +
+      "IndexStride\030\010 \001(\r\"\334\001\n\nPostScript\022\024\n\014foot" +
+      "erLength\030\001 \001(\004\022F\n\013compression\030\002 \001(\01621.or" +
+      "g.apache.hadoop.hive.ql.io.orc.Compressi" +
+      "onKind\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007" +
+      "version\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(",
+      "\004\022\025\n\rwriterVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t" +
+      "*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022" +
+      "\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -17787,7 +17868,7 @@ public final class OrcProto {
           internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor,
-              new java.lang.String[] { "NumberOfValues", "IntStatistics", "DoubleStatistics", "StringStatistics", "BucketStatistics", "DecimalStatistics", "DateStatistics", "BinaryStatistics", "TimestampStatistics", });
+              new java.lang.String[] { "NumberOfValues", "IntStatistics", "DoubleStatistics", "StringStatistics", "BucketStatistics", "DecimalStatistics", "DateStatistics", "BinaryStatistics", "TimestampStatistics", "HasNull", });
           internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndexEntry_descriptor =
             getDescriptor().getMessageTypes().get(9);
           internal_static_org_apache_hadoop_hive_ql_io_orc_RowIndexEntry_fieldAccessorTable = new

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java Sat Jan 10 00:09:14 2015
@@ -27,4 +27,10 @@ public interface ColumnStatistics {
    * @return the number of values
    */
   long getNumberOfValues();
+
+  /**
+   * Returns true if there are nulls in the scope of column statistics.
+   * @return true if null present else false
+   */
+  boolean hasNull();
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Sat Jan 10 00:09:14 2015
@@ -17,8 +17,6 @@
  */
 package org.apache.hadoop.hive.ql.io.orc;
 
-import java.sql.Timestamp;
-
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -26,6 +24,8 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
 
+import java.sql.Timestamp;
+
 class ColumnStatisticsImpl implements ColumnStatistics {
 
   private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
@@ -816,11 +816,16 @@ class ColumnStatisticsImpl implements Co
   }
 
   private long count = 0;
+  private boolean hasNull = false;
 
   ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
     if (stats.hasNumberOfValues()) {
       count = stats.getNumberOfValues();
     }
+
+    if (stats.hasHasNull()) {
+      hasNull = stats.getHasNull();
+    }
   }
 
   ColumnStatisticsImpl() {
@@ -830,6 +835,10 @@ class ColumnStatisticsImpl implements Co
     count += 1;
   }
 
+  void setNull() {
+    hasNull = true;
+  }
+
   void updateBoolean(boolean value) {
     throw new UnsupportedOperationException("Can't update boolean");
   }
@@ -864,10 +873,12 @@ class ColumnStatisticsImpl implements Co
 
   void merge(ColumnStatisticsImpl stats) {
     count += stats.count;
+    hasNull |= stats.hasNull;
   }
 
   void reset() {
     count = 0;
+    hasNull = false;
   }
 
   @Override
@@ -876,14 +887,20 @@ class ColumnStatisticsImpl implements Co
   }
 
   @Override
+  public boolean hasNull() {
+    return hasNull;
+  }
+
+  @Override
   public String toString() {
-    return "count: " + count;
+    return "count: " + count + " hasNull: " + hasNull;
   }
 
   OrcProto.ColumnStatistics.Builder serialize() {
     OrcProto.ColumnStatistics.Builder builder =
       OrcProto.ColumnStatistics.newBuilder();
     builder.setNumberOfValues(count);
+    builder.setHasNull(hasNull);
     return builder;
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Sat Jan 10 00:09:14 2015
@@ -17,15 +17,6 @@
  */
 package org.apache.hadoop.hive.ql.io.orc;
 
-import java.io.OutputStreamWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.Map;
-
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.GnuParser;
 import org.apache.commons.cli.HelpFormatter;
@@ -46,6 +37,14 @@ import org.apache.hadoop.io.LongWritable
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONWriter;
 
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
 /**
  * A tool for printing out the file structure of ORC files.
  */
@@ -170,10 +169,7 @@ public final class FileDump {
                 buf.append("no stats at ");
               } else {
                 ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
-                Object min = RecordReaderImpl.getMin(cs), max = RecordReaderImpl.getMax(cs);
-                buf.append(" count: ").append(cs.getNumberOfValues());
-                buf.append(" min: ").append(min);
-                buf.append(" max: ").append(max);
+                buf.append(cs.toString());
               }
               buf.append(" positions: ");
               for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Sat Jan 10 00:09:14 2015
@@ -18,18 +18,9 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.NavigableMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -54,9 +45,9 @@ import org.apache.hadoop.hive.ql.io.Reco
 import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -74,9 +65,18 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.util.StringUtils;
 
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 /**
  * A MapReduce/Hive input format for ORC files.
  * <p>
@@ -919,13 +919,8 @@ public class OrcInputFormat  implements
         if (filterColumns[pred] != -1) {
 
           // column statistics at index 0 contains only the number of rows
-          ColumnStatistics stats =
-              stripeStatistics.getColumnStatistics()[filterColumns[pred]];
-          Object minValue = RecordReaderImpl.getMin(stats);
-          Object maxValue = RecordReaderImpl.getMax(stats);
-          truthValues[pred] =
-              RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred),
-                  minValue, maxValue);
+          ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
+          truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred));
         } else {
 
           // parition column case.

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sat Jan 10 00:09:14 2015
@@ -2356,21 +2356,36 @@ class RecordReaderImpl implements Record
   /**
    * Evaluate a predicate with respect to the statistics from the column
    * that is referenced in the predicate.
-   * @param index the statistics for the column mentioned in the predicate
+   * @param statsProto the statistics for the column mentioned in the predicate
    * @param predicate the leaf predicate we need to evaluation
    * @return the set of truth values that may be returned for the given
    *   predicate.
    */
-  static TruthValue evaluatePredicate(OrcProto.ColumnStatistics index,
+  static TruthValue evaluatePredicate(OrcProto.ColumnStatistics statsProto,
                                       PredicateLeaf predicate) {
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(index);
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
     Object minValue = getMin(cs);
     Object maxValue = getMax(cs);
-    return evaluatePredicateRange(predicate, minValue, maxValue);
+    return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull());
+  }
+
+  /**
+   * Evaluate a predicate with respect to the statistics from the column
+   * that is referenced in the predicate.
+   * @param stats the statistics for the column mentioned in the predicate
+   * @param predicate the leaf predicate we need to evaluation
+   * @return the set of truth values that may be returned for the given
+   *   predicate.
+   */
+  static TruthValue evaluatePredicate(ColumnStatistics stats,
+      PredicateLeaf predicate) {
+    Object minValue = getMin(stats);
+    Object maxValue = getMax(stats);
+    return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull());
   }
 
   static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
-      Object max) {
+      Object max, boolean hasNull) {
     // if we didn't have any values, everything must have been null
     if (min == null) {
       if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
@@ -2405,29 +2420,29 @@ class RecordReaderImpl implements Record
       case EQUALS:
         loc = compareToRange((Comparable) predObj, minValue, maxValue);
         if (minValue.equals(maxValue) && loc == Location.MIN) {
-          return TruthValue.YES_NULL;
+          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
         } else if (loc == Location.BEFORE || loc == Location.AFTER) {
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         } else {
-          return TruthValue.YES_NO_NULL;
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case LESS_THAN:
         loc = compareToRange((Comparable) predObj, minValue, maxValue);
         if (loc == Location.AFTER) {
-          return TruthValue.YES_NULL;
+          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
         } else if (loc == Location.BEFORE || loc == Location.MIN) {
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         } else {
-          return TruthValue.YES_NO_NULL;
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case LESS_THAN_EQUALS:
         loc = compareToRange((Comparable) predObj, minValue, maxValue);
         if (loc == Location.AFTER || loc == Location.MAX) {
-          return TruthValue.YES_NULL;
+          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
         } else if (loc == Location.BEFORE) {
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         } else {
-          return TruthValue.YES_NO_NULL;
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case IN:
         if (minValue.equals(maxValue)) {
@@ -2437,10 +2452,10 @@ class RecordReaderImpl implements Record
             predObj = getBaseObjectForComparison(arg, minValue);
             loc = compareToRange((Comparable) predObj, minValue, maxValue);
             if (loc == Location.MIN) {
-              return TruthValue.YES_NULL;
+              return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
             }
           }
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         } else {
           // are all of the values outside of the range?
           for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
@@ -2448,10 +2463,10 @@ class RecordReaderImpl implements Record
             loc = compareToRange((Comparable) predObj, minValue, maxValue);
             if (loc == Location.MIN || loc == Location.MIDDLE ||
                 loc == Location.MAX) {
-              return TruthValue.YES_NO_NULL;
+              return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
             }
           }
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         }
       case BETWEEN:
         List<Object> args = predicate.getLiteralList(PredicateLeaf.FileFormat.ORC);
@@ -2463,26 +2478,26 @@ class RecordReaderImpl implements Record
 
           Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue);
           if (loc2 == Location.AFTER || loc2 == Location.MAX) {
-            return TruthValue.YES_NULL;
+            return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
           } else if (loc2 == Location.BEFORE) {
-            return TruthValue.NO_NULL;
+            return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
           } else {
-            return TruthValue.YES_NO_NULL;
+            return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
           }
         } else if (loc == Location.AFTER) {
-          return TruthValue.NO_NULL;
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
         } else {
-          return TruthValue.YES_NO_NULL;
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case IS_NULL:
-        return TruthValue.YES_NO;
+        return hasNull ? TruthValue.YES : TruthValue.NO;
       default:
-        return TruthValue.YES_NO_NULL;
+        return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
       }
 
       // in case failed conversion, return the default YES_NO_NULL truth value
     } catch (NumberFormatException nfe) {
-      return TruthValue.YES_NO_NULL;
+      return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
     }
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Sat Jan 10 00:09:14 2015
@@ -656,6 +656,8 @@ class WriterImpl implements Writer, Memo
     void write(Object obj) throws IOException {
       if (obj != null) {
         indexStatistics.increment();
+      } else {
+        indexStatistics.setNull();
       }
       if (isPresent != null) {
         isPresent.write(obj == null ? 0 : 1);

Modified: hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Sat Jan 10 00:09:14 2015
@@ -74,6 +74,7 @@ message ColumnStatistics {
   optional DateStatistics dateStatistics = 7;
   optional BinaryStatistics binaryStatistics = 8;
   optional TimestampStatistics timestampStatistics = 9;
+  optional bool hasNull = 10;
 }
 
 message RowIndexEntry {

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java Sat Jan 10 00:09:14 2015
@@ -18,16 +18,28 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
+import static junit.framework.Assert.assertEquals;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
+import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TestName;
 
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
 import java.sql.Timestamp;
-
-import static junit.framework.Assert.assertEquals;
+import java.util.List;
 
 /**
  * Test ColumnStatisticsImpl for ORC.
@@ -173,4 +185,159 @@ public class TestColumnStatistics {
     assertEquals(-10, typed.getMinimum().longValue());
     assertEquals(10000, typed.getMaximum().longValue());
   }
+
+
+  public static class SimpleStruct {
+    BytesWritable bytes1;
+    Text string1;
+
+    SimpleStruct(BytesWritable b1, String s1) {
+      this.bytes1 = b1;
+      if (s1 == null) {
+        this.string1 = null;
+      } else {
+        this.string1 = new Text(s1);
+      }
+    }
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    testFilePath = new Path("TestOrcFile." + testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  private static BytesWritable bytes(int... items) {
+    BytesWritable result = new BytesWritable();
+    result.setSize(items.length);
+    for (int i = 0; i < items.length; ++i) {
+      result.getBytes()[i] = (byte) items[i];
+    }
+    return result;
+  }
+
+  @Test
+  public void testHasNull() throws Exception {
+
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (SimpleStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .inspector(inspector)
+            .rowIndexStride(1000)
+            .stripeSize(10000)
+            .bufferSize(10000));
+    // STRIPE 1
+    // RG1
+    for(int i=0; i<1000; i++) {
+      writer.addRow(new SimpleStruct(bytes(1,2,3), "RG1"));
+    }
+    // RG2
+    for(int i=0; i<1000; i++) {
+      writer.addRow(new SimpleStruct(bytes(1,2,3), null));
+    }
+    // RG3
+    for(int i=0; i<1000; i++) {
+      writer.addRow(new SimpleStruct(bytes(1,2,3), "RG3"));
+    }
+    // RG4
+    for(int i=0; i<1000; i++) {
+      writer.addRow(new SimpleStruct(bytes(1,2,3), null));
+    }
+    // RG5
+    for(int i=0; i<1000; i++) {
+      writer.addRow(new SimpleStruct(bytes(1,2,3), null));
+    }
+    // STRIPE 2
+    for(int i=0; i<5000; i++) {
+      writer.addRow(new SimpleStruct(bytes(1,2,3), null));
+    }
+    // STRIPE 3
+    for(int i=0; i<5000; i++) {
+      writer.addRow(new SimpleStruct(bytes(1,2,3), "STRIPE-3"));
+    }
+    // STRIPE 4
+    for(int i=0; i<5000; i++) {
+      writer.addRow(new SimpleStruct(bytes(1,2,3), null));
+    }
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    // check the file level stats
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(20000, stats[0].getNumberOfValues());
+    assertEquals(20000, stats[1].getNumberOfValues());
+    assertEquals(7000, stats[2].getNumberOfValues());
+    assertEquals(false, stats[0].hasNull());
+    assertEquals(false, stats[1].hasNull());
+    assertEquals(true, stats[2].hasNull());
+
+    // check the stripe level stats
+    List<StripeStatistics> stripeStats = reader.getMetadata().getStripeStatistics();
+    // stripe 1 stats
+    StripeStatistics ss1 = stripeStats.get(0);
+    ColumnStatistics ss1_cs1 = ss1.getColumnStatistics()[0];
+    ColumnStatistics ss1_cs2 = ss1.getColumnStatistics()[1];
+    ColumnStatistics ss1_cs3 = ss1.getColumnStatistics()[2];
+    assertEquals(false, ss1_cs1.hasNull());
+    assertEquals(false, ss1_cs2.hasNull());
+    assertEquals(true, ss1_cs3.hasNull());
+
+    // stripe 2 stats
+    StripeStatistics ss2 = stripeStats.get(1);
+    ColumnStatistics ss2_cs1 = ss2.getColumnStatistics()[0];
+    ColumnStatistics ss2_cs2 = ss2.getColumnStatistics()[1];
+    ColumnStatistics ss2_cs3 = ss2.getColumnStatistics()[2];
+    assertEquals(false, ss2_cs1.hasNull());
+    assertEquals(false, ss2_cs2.hasNull());
+    assertEquals(true, ss2_cs3.hasNull());
+
+    // stripe 3 stats
+    StripeStatistics ss3 = stripeStats.get(2);
+    ColumnStatistics ss3_cs1 = ss3.getColumnStatistics()[0];
+    ColumnStatistics ss3_cs2 = ss3.getColumnStatistics()[1];
+    ColumnStatistics ss3_cs3 = ss3.getColumnStatistics()[2];
+    assertEquals(false, ss3_cs1.hasNull());
+    assertEquals(false, ss3_cs2.hasNull());
+    assertEquals(false, ss3_cs3.hasNull());
+
+    // stripe 4 stats
+    StripeStatistics ss4 = stripeStats.get(3);
+    ColumnStatistics ss4_cs1 = ss4.getColumnStatistics()[0];
+    ColumnStatistics ss4_cs2 = ss4.getColumnStatistics()[1];
+    ColumnStatistics ss4_cs3 = ss4.getColumnStatistics()[2];
+    assertEquals(false, ss4_cs1.hasNull());
+    assertEquals(false, ss4_cs2.hasNull());
+    assertEquals(true, ss4_cs3.hasNull());
+
+    // Test file dump
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-has-null.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
+    System.out.flush();
+    System.setOut(origOut);
+
+    TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
 }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java Sat Jan 10 00:09:14 2015
@@ -21,6 +21,19 @@ package org.apache.hadoop.hive.ql.io.orc
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hive.common.util.HiveTestUtils;
+import org.junit.Before;
+import org.junit.Test;
+
 import java.io.BufferedReader;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
@@ -36,21 +49,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Random;
 
-import junit.framework.Assert;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hive.common.util.HiveTestUtils;
-import org.fusesource.leveldbjni.All;
-import org.junit.Before;
-import org.junit.Test;
-
 public class TestFileDump {
 
   Path workDir = new Path(System.getProperty("test.tmp.dir"));
@@ -127,7 +125,7 @@ public class TestFileDump {
     }
   }
 
-  private static void checkOutput(String expected,
+  static void checkOutput(String expected,
                                   String actual) throws Exception {
     BufferedReader eStream =
         new BufferedReader(new FileReader(HiveTestUtils.getFileFromClasspath(expected)));

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Sat Jan 10 00:09:14 2015
@@ -21,6 +21,27 @@ import static org.junit.Assert.assertArr
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.TreeSet;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -83,27 +104,6 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TestName;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-import java.util.TimeZone;
-import java.util.TreeSet;
-
 public class TestInputOutputFormat {
 
   Path workDir = new Path(System.getProperty("test.tmp.dir","target/tmp"));
@@ -1638,14 +1638,14 @@ public class TestInputOutputFormat {
     assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000",
         split.getPath().toString());
     assertEquals(0, split.getStart());
-    assertEquals(582, split.getLength());
+    assertEquals(607, split.getLength());
     split = (HiveInputFormat.HiveInputSplit) splits[1];
     assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
         split.inputFormatClassName());
     assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001",
         split.getPath().toString());
     assertEquals(0, split.getStart());
-    assertEquals(603, split.getLength());
+    assertEquals(629, split.getLength());
     CombineHiveInputFormat.CombineHiveInputSplit combineSplit =
         (CombineHiveInputFormat.CombineHiveInputSplit) splits[2];
     assertEquals(BUCKETS, combineSplit.getNumPaths());
@@ -1653,7 +1653,7 @@ public class TestInputOutputFormat {
       assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0",
           combineSplit.getPath(bucket).toString());
       assertEquals(0, combineSplit.getOffset(bucket));
-      assertEquals(227, combineSplit.getLength(bucket));
+      assertEquals(241, combineSplit.getLength(bucket));
     }
     String[] hosts = combineSplit.getLocations();
     assertEquals(2, hosts.length);

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Sat Jan 10 00:09:14 2015
@@ -256,13 +256,13 @@ public class TestOrcFile {
     assertEquals(7500, stats[1].getNumberOfValues());
     assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
     assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount());
-    assertEquals("count: 7500 true: 3750", stats[1].toString());
+    assertEquals("count: 7500 hasNull: false true: 3750", stats[1].toString());
 
     assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
     assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
     assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum());
-    assertEquals("count: 7500 min: 1024 max: 2048 sum: 11520000",
+    assertEquals("count: 7500 hasNull: false min: 1024 max: 2048 sum: 11520000",
         stats[3].toString());
 
     assertEquals(Long.MAX_VALUE,
@@ -271,17 +271,17 @@ public class TestOrcFile {
         ((IntegerColumnStatistics) stats[5]).getMinimum());
     assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
     assertEquals(
-        "count: 7500 min: 9223372036854775807 max: 9223372036854775807",
+        "count: 7500 hasNull: false min: 9223372036854775807 max: 9223372036854775807",
         stats[5].toString());
 
     assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
     assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
     assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(),
         0.00001);
-    assertEquals("count: 7500 min: -15.0 max: -5.0 sum: -75000.0",
+    assertEquals("count: 7500 hasNull: false min: -15.0 max: -5.0 sum: -75000.0",
         stats[7].toString());
 
-    assertEquals("count: 7500 min: bye max: hi sum: 0", stats[9].toString());
+    assertEquals("count: 7500 hasNull: false min: bye max: hi sum: 0", stats[9].toString());
 
     // check the inspectors
     StructObjectInspector readerInspector = (StructObjectInspector) reader
@@ -541,17 +541,17 @@ public class TestOrcFile {
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
     assertEquals(4, stats[0].getNumberOfValues());
-    assertEquals("count: 4", stats[0].toString());
+    assertEquals("count: 4 hasNull: false", stats[0].toString());
 
     assertEquals(3, stats[1].getNumberOfValues());
     assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 3 sum: 15", stats[1].toString());
+    assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
 
     assertEquals(3, stats[2].getNumberOfValues());
     assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
     assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
-    assertEquals("count: 3 min: bar max: hi sum: 8",
+    assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8",
         stats[2].toString());
 
     // check the inspectors
@@ -722,13 +722,13 @@ public class TestOrcFile {
     assertEquals(2, stats[1].getNumberOfValues());
     assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
     assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
-    assertEquals("count: 2 true: 1", stats[1].toString());
+    assertEquals("count: 2 hasNull: false true: 1", stats[1].toString());
 
     assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
     assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
     assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
-    assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+    assertEquals("count: 2 hasNull: false min: 1024 max: 2048 sum: 3072",
         stats[3].toString());
 
     StripeStatistics ss = metadata.getStripeStatistics().get(0);
@@ -740,10 +740,10 @@ public class TestOrcFile {
     assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
     assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
     assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
-    assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+    assertEquals("count: 2 hasNull: false min: -15.0 max: -5.0 sum: -20.0",
         stats[7].toString());
 
-    assertEquals("count: 2 min: bye max: hi sum: 5", stats[9].toString());
+    assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", stats[9].toString());
 
     // check the inspectors
     StructObjectInspector readerInspector =

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java Sat Jan 10 00:09:14 2015
@@ -119,13 +119,13 @@ public class TestOrcNullOptimization {
     assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
     assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 19998 min: 0 max: 0 sum: 0",
+    assertEquals("count: 19998 hasNull: true min: 0 max: 0 sum: 0",
         stats[1].toString());
 
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
     assertEquals(19998, stats[2].getNumberOfValues());
-    assertEquals("count: 19998 min: a max: a sum: 19998",
+    assertEquals("count: 19998 hasNull: true min: a max: a sum: 19998",
         stats[2].toString());
 
     // check the inspectors
@@ -223,13 +223,13 @@ public class TestOrcNullOptimization {
     assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
     assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 20000 min: 0 max: 0 sum: 0",
+    assertEquals("count: 20000 hasNull: false min: 0 max: 0 sum: 0",
         stats[1].toString());
 
     assertEquals("b", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
     assertEquals(20000, stats[2].getNumberOfValues());
-    assertEquals("count: 20000 min: a max: b sum: 20000",
+    assertEquals("count: 20000 hasNull: false min: a max: b sum: 20000",
         stats[2].toString());
 
     // check the inspectors
@@ -324,13 +324,13 @@ public class TestOrcNullOptimization {
     assertEquals(2, ((IntegerColumnStatistics) stats[1]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
     assertEquals(17, ((IntegerColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 7 min: 2 max: 3 sum: 17",
+    assertEquals("count: 7 hasNull: true min: 2 max: 3 sum: 17",
         stats[1].toString());
 
     assertEquals("h", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
     assertEquals(7, stats[2].getNumberOfValues());
-    assertEquals("count: 7 min: a max: h sum: 7",
+    assertEquals("count: 7 hasNull: true min: a max: h sum: 7",
         stats[2].toString());
 
     // check the inspectors

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java Sat Jan 10 00:09:14 2015
@@ -220,17 +220,17 @@ public class TestOrcSerDeStats {
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
     assertEquals(4, stats[0].getNumberOfValues());
-    assertEquals("count: 4", stats[0].toString());
+    assertEquals("count: 4 hasNull: false", stats[0].toString());
 
     assertEquals(3, stats[1].getNumberOfValues());
     assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 3 sum: 15", stats[1].toString());
+    assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
 
     assertEquals(3, stats[2].getNumberOfValues());
     assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
     assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
-    assertEquals("count: 3 min: bar max: hi sum: 8",
+    assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8",
         stats[2].toString());
 
     // check the inspectors
@@ -448,13 +448,13 @@ public class TestOrcSerDeStats {
     assertEquals(2, stats[1].getNumberOfValues());
     assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
     assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
-    assertEquals("count: 2 true: 1", stats[1].toString());
+    assertEquals("count: 2 hasNull: false true: 1", stats[1].toString());
 
     assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
     assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
     assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
-    assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+    assertEquals("count: 2 hasNull: false min: 1024 max: 2048 sum: 3072",
         stats[3].toString());
 
     assertEquals(Long.MAX_VALUE,
@@ -462,16 +462,16 @@ public class TestOrcSerDeStats {
     assertEquals(Long.MAX_VALUE,
         ((IntegerColumnStatistics) stats[5]).getMinimum());
     assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
-    assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
+    assertEquals("count: 2 hasNull: false min: 9223372036854775807 max: 9223372036854775807",
         stats[5].toString());
 
     assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
     assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
     assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
-    assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+    assertEquals("count: 2 hasNull: false min: -15.0 max: -5.0 sum: -20.0",
         stats[7].toString());
 
-    assertEquals("count: 2 min: bye max: hi sum: 5", stats[9].toString());
+    assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", stats[9].toString());
   }
 
   @Test
@@ -541,13 +541,13 @@ public class TestOrcSerDeStats {
     assertEquals(2, stats[1].getNumberOfValues());
     assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
     assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
-    assertEquals("count: 2 true: 1", stats[1].toString());
+    assertEquals("count: 2 hasNull: false true: 1", stats[1].toString());
 
     assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
     assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
     assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
-    assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+    assertEquals("count: 2 hasNull: false min: 1024 max: 2048 sum: 3072",
         stats[3].toString());
 
     assertEquals(Long.MAX_VALUE,
@@ -555,22 +555,22 @@ public class TestOrcSerDeStats {
     assertEquals(Long.MAX_VALUE,
         ((IntegerColumnStatistics) stats[5]).getMinimum());
     assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
-    assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
+    assertEquals("count: 2 hasNull: false min: 9223372036854775807 max: 9223372036854775807",
         stats[5].toString());
 
     assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
     assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
     assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
-    assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+    assertEquals("count: 2 hasNull: false min: -15.0 max: -5.0 sum: -20.0",
         stats[7].toString());
 
     assertEquals(5, ((BinaryColumnStatistics) stats[8]).getSum());
-    assertEquals("count: 2 sum: 5", stats[8].toString());
+    assertEquals("count: 2 hasNull: false sum: 5", stats[8].toString());
 
     assertEquals("bye", ((StringColumnStatistics) stats[9]).getMinimum());
     assertEquals("hi", ((StringColumnStatistics) stats[9]).getMaximum());
     assertEquals(5, ((StringColumnStatistics) stats[9]).getSum());
-    assertEquals("count: 2 min: bye max: hi sum: 5", stats[9].toString());
+    assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", stats[9].toString());
   }
 
   @Test(expected = ClassCastException.class)
@@ -603,13 +603,13 @@ public class TestOrcSerDeStats {
     assertEquals(7500, stats[1].getNumberOfValues());
     assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
     assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount());
-    assertEquals("count: 7500 true: 3750", stats[1].toString());
+    assertEquals("count: 7500 hasNull: false true: 3750", stats[1].toString());
 
     assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
     assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
     assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum());
-    assertEquals("count: 7500 min: 1024 max: 2048 sum: 11520000",
+    assertEquals("count: 7500 hasNull: false min: 1024 max: 2048 sum: 11520000",
         stats[3].toString());
 
     assertEquals(Long.MAX_VALUE,
@@ -618,24 +618,24 @@ public class TestOrcSerDeStats {
         ((IntegerColumnStatistics) stats[5]).getMinimum());
     assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
     assertEquals(
-        "count: 7500 min: 9223372036854775807 max: 9223372036854775807",
+        "count: 7500 hasNull: false min: 9223372036854775807 max: 9223372036854775807",
         stats[5].toString());
 
     assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
     assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
     assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(),
         0.00001);
-    assertEquals("count: 7500 min: -15.0 max: -5.0 sum: -75000.0",
+    assertEquals("count: 7500 hasNull: false min: -15.0 max: -5.0 sum: -75000.0",
         stats[7].toString());
 
     assertEquals("bye", ((StringColumnStatistics) stats[9]).getMinimum());
     assertEquals("hi", ((StringColumnStatistics) stats[9]).getMaximum());
     assertEquals(0, ((StringColumnStatistics) stats[9]).getSum());
-    assertEquals("count: 7500 min: bye max: hi sum: 0", stats[9].toString());
+    assertEquals("count: 7500 hasNull: false min: bye max: hi sum: 0", stats[9].toString());
 
     // old orc format will not have binary statistics. toString() will show only
     // the general column statistics
-    assertEquals("count: 7500", stats[8].toString());
+    assertEquals("count: 7500 hasNull: false", stats[8].toString());
     // since old orc format doesn't support binary statistics,
     // this should throw ClassCastException
     assertEquals(5, ((BinaryColumnStatistics) stats[8]).getSum());

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java?rev=1650699&r1=1650698&r2=1650699&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java Sat Jan 10 00:09:14 2015
@@ -23,11 +23,6 @@ import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileStatus;
@@ -46,6 +41,11 @@ import org.junit.Test;
 import org.mockito.MockSettings;
 import org.mockito.Mockito;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
 public class TestRecordReaderImpl {
 
   // can add .verboseLogging() to cause Mockito to log invocations
@@ -264,6 +264,15 @@ public class TestRecordReaderImpl {
     return OrcProto.ColumnStatistics.newBuilder().setDoubleStatistics(dblStats.build()).build();
   }
 
+  private static OrcProto.ColumnStatistics createStringStats(String min, String max,
+      boolean hasNull) {
+    OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();
+    strStats.setMinimum(min);
+    strStats.setMaximum(max);
+    return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build())
+        .setHasNull(hasNull).build();
+  }
+
   private static OrcProto.ColumnStatistics createStringStats(String min, String max) {
     OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();
     strStats.setMinimum(min);
@@ -483,17 +492,17 @@ public class TestRecordReaderImpl {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
         (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER,
             "x", 15L, null);
-    assertEquals(TruthValue.NO_NULL,
+    assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred));
-    assertEquals(TruthValue.NO_NULL,
+    assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred));
-    assertEquals(TruthValue.YES_NULL,
+    assertEquals(TruthValue.YES,
         RecordReaderImpl.evaluatePredicate(createIntStats(15L, 15L), pred));
   }
 
@@ -521,15 +530,15 @@ public class TestRecordReaderImpl {
     PredicateLeaf lessThan = TestSearchArgumentImpl.createPredicateLeaf
         (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.INTEGER,
             "x", 15L, null);
-    assertEquals(TruthValue.NO_NULL,
+    assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), lessThan));
-    assertEquals(TruthValue.NO_NULL,
+    assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), lessThan));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), lessThan));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), lessThan));
-    assertEquals(TruthValue.YES_NULL,
+    assertEquals(TruthValue.YES,
         RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), lessThan));
   }
 
@@ -538,15 +547,15 @@ public class TestRecordReaderImpl {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
         (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.INTEGER,
             "x", 15L, null);
-    assertEquals(TruthValue.NO_NULL,
+    assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(15L, 30L), pred));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
-    assertEquals(TruthValue.YES_NULL,
+    assertEquals(TruthValue.YES,
         RecordReaderImpl.evaluatePredicate(createIntStats(10L, 15L), pred));
-    assertEquals(TruthValue.YES_NULL,
+    assertEquals(TruthValue.YES,
         RecordReaderImpl.evaluatePredicate(createIntStats(0L, 10L), pred));
   }
 
@@ -558,13 +567,13 @@ public class TestRecordReaderImpl {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
         (PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER,
             "x", null, args);
-    assertEquals(TruthValue.YES_NULL,
+    assertEquals(TruthValue.YES,
         RecordReaderImpl.evaluatePredicate(createIntStats(20L, 20L), pred));
-    assertEquals(TruthValue.NO_NULL,
+    assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(30L, 30L), pred));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(10L, 30L), pred));
-    assertEquals(TruthValue.NO_NULL,
+    assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred));
   }
 
@@ -576,19 +585,19 @@ public class TestRecordReaderImpl {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
         (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.INTEGER,
             "x", null, args);
-    assertEquals(TruthValue.NO_NULL,
+    assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(0L, 5L), pred));
-    assertEquals(TruthValue.NO_NULL,
+    assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(30L, 40L), pred));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(5L, 15L), pred));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(15L, 25L), pred));
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(5L, 25L), pred));
-    assertEquals(TruthValue.YES_NULL,
+    assertEquals(TruthValue.YES,
         RecordReaderImpl.evaluatePredicate(createIntStats(10L, 20L), pred));
-    assertEquals(TruthValue.YES_NULL,
+    assertEquals(TruthValue.YES,
         RecordReaderImpl.evaluatePredicate(createIntStats(12L, 18L), pred));
   }
 
@@ -597,10 +606,156 @@ public class TestRecordReaderImpl {
     PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
         (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.INTEGER,
             "x", null, null);
-    assertEquals(TruthValue.YES_NO,
+    assertEquals(TruthValue.NO,
         RecordReaderImpl.evaluatePredicate(createIntStats(20L, 30L), pred));
   }
 
+
+  @Test
+  public void testEqualsWithNullInStats() throws Exception {
+    PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+        (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING,
+            "x", "c", null);
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+  }
+
+  @Test
+  public void testNullSafeEqualsWithNullInStats() throws Exception {
+    PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+        (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING,
+            "x", "c", null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+  }
+
+  @Test
+  public void testLessThanWithNullInStats() throws Exception {
+    PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+        (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING,
+            "x", "c", null);
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+    assertEquals(TruthValue.NO_NULL, // min, same stats
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred));
+  }
+
+  @Test
+  public void testLessThanEqualsWithNullInStats() throws Exception {
+    PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+        (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING,
+            "x", "c", null);
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred)); // before
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("b", "c", true), pred)); // max
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+  }
+
+  @Test
+  public void testInWithNullInStats() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add("c");
+    args.add("f");
+    PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
+            "x", null, args);
+    assertEquals(TruthValue.NO_NULL, // before & after
+        RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred));
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred)); // max
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred)); // min
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred)); // same
+  }
+
+  @Test
+  public void testBetweenWithNullInStats() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add("c");
+    args.add("f");
+    PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+        (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.STRING,
+            "x", null, args);
+    assertEquals(TruthValue.YES_NULL, // before & after
+        RecordReaderImpl.evaluatePredicate(createStringStats("d", "e", true), pred));
+    assertEquals(TruthValue.YES_NULL, // before & max
+        RecordReaderImpl.evaluatePredicate(createStringStats("e", "f", true), pred));
+    assertEquals(TruthValue.NO_NULL, // before & before
+        RecordReaderImpl.evaluatePredicate(createStringStats("h", "g", true), pred));
+    assertEquals(TruthValue.YES_NO_NULL, // before & min
+        RecordReaderImpl.evaluatePredicate(createStringStats("f", "g", true), pred));
+    assertEquals(TruthValue.YES_NO_NULL, // before & middle
+        RecordReaderImpl.evaluatePredicate(createStringStats("e", "g", true), pred));
+
+    assertEquals(TruthValue.YES_NULL, // min & after
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "e", true), pred));
+    assertEquals(TruthValue.YES_NULL, // min & max
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "f", true), pred));
+    assertEquals(TruthValue.YES_NO_NULL, // min & middle
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "g", true), pred));
+
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("a", "b", true), pred)); // after
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("a", "c", true), pred)); // max
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicate(createStringStats("b", "d", true), pred)); // middle
+    assertEquals(TruthValue.YES_NULL, // min & after, same stats
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "c", true), pred));
+  }
+
+  @Test
+  public void testIsNullWithNullInStats() throws Exception {
+    PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
+        (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING,
+            "x", null, null);
+    assertEquals(TruthValue.YES,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", true), pred));
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicate(createStringStats("c", "d", false), pred));
+  }
+
   @Test
   public void testOverlap() throws Exception {
     assertTrue(!RecordReaderImpl.overlap(0, 10, -10, -1));



Mime
View raw message