hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gunt...@apache.org
Subject svn commit: r1570426 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ ql/src/test/org/apache/hadoop/hive/ql/io/orc/
Date Fri, 21 Feb 2014 02:46:20 GMT
Author: gunther
Date: Fri Feb 21 02:46:19 2014
New Revision: 1570426

URL: http://svn.apache.org/r1570426
Log:
HIVE-6382: PATCHED_BLOB encoding in ORC will corrupt data in some cases (Patch by Prasanth J, reviewed by Sergey Shelukhin)

Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/conf/hive-default.xml.template
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Fri Feb 21 02:46:19 2014
@@ -533,6 +533,7 @@ public class HiveConf extends Configurat
     HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false),
     HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000),
     HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10),
+    HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false),
 
     HIVESKEWJOIN("hive.optimize.skewjoin", false),
     HIVECONVERTJOIN("hive.auto.convert.join", true),

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Fri Feb 21 02:46:19 2014
@@ -1933,6 +1933,14 @@
 </property>
 
 <property>
+  <name>hive.exec.orc.skip.corrupt.data</name>
+  <value>false</value>
+  <description>If ORC reader encounters corrupt data, this value will be used to determine
+  whether to skip the corrupt data or throw exception. The default behavior is to throw exception.
+  </description>
+</property>
+
+<property>
   <name>hive.multi.insert.move.tasks.share.dependencies</name>
   <value>false</value>
   <description>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java Fri Feb 21 02:46:19 2014
@@ -25,6 +25,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.antlr.runtime.tree.Tree;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.ASTNodeOrigin;
@@ -432,7 +433,9 @@ public enum ErrorMsg {
   STATSAGGREGATOR_MISSED_SOMESTATS(30016,
       "Stats type {0} is missing from stats aggregator. If you don't want the query " +
       "to fail because of this, set hive.stats.atomic=false", true),
-  STATS_SKIPPING_BY_ERROR(30017, "Skipping stats aggregation by error {0}", true);
+  STATS_SKIPPING_BY_ERROR(30017, "Skipping stats aggregation by error {0}", true),
+  ORC_CORRUPTED_READ(30018, "Corruption in ORC data encountered. To skip reading corrupted "
+      + "data, set " + HiveConf.ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA + " to true");
   ;
 
   private int errorCode;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Fri Feb 21 02:46:19 2014
@@ -33,7 +33,7 @@ public final class FileDump {
     for(String filename: args) {
       System.out.println("Structure for " + filename);
       Path path = new Path(filename);
-      Reader reader = OrcFile.createReader(path.getFileSystem(conf), path);
+      Reader reader = OrcFile.createReader(path.getFileSystem(conf), path, conf);
       RecordReaderImpl rows = (RecordReaderImpl) reader.rows(null);
       System.out.println("Rows: " + reader.getNumberOfRows());
       System.out.println("Compression: " + reader.getCompression());

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Fri Feb 21 02:46:19 2014
@@ -114,14 +114,15 @@ public final class OrcFile {
    * @return a new ORC file reader.
    * @throws IOException
    */
-  public static Reader createReader(FileSystem fs, Path path
-                                    ) throws IOException {
-    return new ReaderImpl(fs, path);
+  public static Reader createReader(FileSystem fs, Path path,
+                                    Configuration conf) throws IOException {
+    return new ReaderImpl(fs, path, conf);
   }
 
-  public static Reader createReader(FileSystem fs, Path path, FileMetaInfo fileMetaInfo)
+  public static Reader createReader(FileSystem fs, Path path,
+      FileMetaInfo fileMetaInfo, Configuration conf)
       throws IOException {
-    return new ReaderImpl(fs, path, fileMetaInfo);
+    return new ReaderImpl(fs, path, fileMetaInfo, conf);
   }
 
   /**

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Fri Feb 21 02:46:19 2014
@@ -270,16 +270,16 @@ public class OrcInputFormat  implements 
 
     if(!(fSplit instanceof OrcSplit)){
       //If CombineHiveInputFormat is used, it works with FileSplit and not OrcSplit
-      reader = OrcFile.createReader(fs, path);
+      reader = OrcFile.createReader(fs, path, conf);
     } else {
       //We have OrcSplit, which may have footer metadata cached, so use the appropriate reader
       //constructor
       OrcSplit orcSplit = (OrcSplit) fSplit;
       if (orcSplit.hasFooter()) {
         FileMetaInfo fMetaInfo = orcSplit.getFileMetaInfo();
-        reader = OrcFile.createReader(fs, path, fMetaInfo);
+        reader = OrcFile.createReader(fs, path, fMetaInfo, conf);
       } else {
-        reader = OrcFile.createReader(fs, path);
+        reader = OrcFile.createReader(fs, path, conf);
       }
     }
     return new OrcRecordReader(reader, conf, fSplit.getStart(), fSplit.getLength());
@@ -299,7 +299,7 @@ public class OrcInputFormat  implements 
     }
     for (FileStatus file : files) {
       try {
-        OrcFile.createReader(fs, file.getPath());
+        OrcFile.createReader(fs, file.getPath(), conf);
       } catch (IOException e) {
         return false;
       }
@@ -797,14 +797,14 @@ public class OrcInputFormat  implements 
           types = fileInfo.types;
           // For multiple runs, in case sendSplitsInFooter changes
           if (fileMetaInfo == null && context.footerInSplits) {
-            orcReader = OrcFile.createReader(fs, file.getPath());
+            orcReader = OrcFile.createReader(fs, file.getPath(), context.conf);
             fileInfo.fileMetaInfo = orcReader.getFileMetaInfo();
             fileInfo.metadata = orcReader.getMetadata();
             fileInfo.types = orcReader.getTypes();
           }
         }
         if (!found) {
-          orcReader = OrcFile.createReader(fs, file.getPath());
+          orcReader = OrcFile.createReader(fs, file.getPath(), context.conf);
           stripes = orcReader.getStripes();
           metadata = orcReader.getMetadata();
           types = orcReader.getTypes();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java Fri Feb 21 02:46:19 2014
@@ -46,9 +46,10 @@ public class OrcNewInputFormat extends I
       throws IOException, InterruptedException {
     FileSplit fileSplit = (FileSplit) inputSplit;
     Path path = fileSplit.getPath();
-    FileSystem fs = path.getFileSystem(ShimLoader.getHadoopShims()
-        .getConfiguration(context));
-    return new OrcRecordReader(OrcFile.createReader(fs, path),
+    Configuration conf = ShimLoader.getHadoopShims()
+        .getConfiguration(context);
+    FileSystem fs = path.getFileSystem(conf);
+    return new OrcRecordReader(OrcFile.createReader(fs, path, conf),
         ShimLoader.getHadoopShims().getConfiguration(context),
         fileSplit.getStart(), fileSplit.getLength());
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Fri Feb 21 02:46:19 2014
@@ -30,6 +30,7 @@ import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -60,6 +61,7 @@ final class ReaderImpl implements Reader
   private final OrcProto.Footer footer;
   private final ObjectInspector inspector;
   private long deserializedSize = -1;
+  private final Configuration conf;
 
   //serialized footer - Keeping this around for use by getFileMetaInfo()
   // will help avoid cpu cycles spend in deserializing at cost of increased
@@ -288,11 +290,13 @@ final class ReaderImpl implements Reader
    * Constructor that extracts metadata information from file footer
    * @param fs
    * @param path
+   * @param conf
    * @throws IOException
    */
-  ReaderImpl(FileSystem fs, Path path) throws IOException {
+  ReaderImpl(FileSystem fs, Path path, Configuration conf) throws IOException {
     this.fileSystem = fs;
     this.path = path;
+    this.conf = conf;
 
     FileMetaInfo footerMetaData = extractMetaInfoFromFooter(fs, path);
 
@@ -316,12 +320,14 @@ final class ReaderImpl implements Reader
    * @param fs
    * @param path
    * @param fMetaInfo
+   * @param conf
    * @throws IOException
    */
-  ReaderImpl(FileSystem fs, Path path, FileMetaInfo fMetaInfo)
+  ReaderImpl(FileSystem fs, Path path, FileMetaInfo fMetaInfo, Configuration conf)
       throws IOException {
     this.fileSystem = fs;
     this.path = path;
+    this.conf = conf;
 
     MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(
             fMetaInfo.compressionType,
@@ -487,7 +493,7 @@ final class ReaderImpl implements Reader
 
     return new RecordReaderImpl(this.getStripes(), fileSystem,  path, offset,
         length, footer.getTypesList(), codec, bufferSize,
-        include, footer.getRowIndexStride(), sarg, columnNames);
+        include, footer.getRowIndexStride(), sarg, columnNames, conf);
   }
 
   @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Fri Feb 21 02:46:19 2014
@@ -31,6 +31,7 @@ import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -84,6 +85,7 @@ class RecordReaderImpl implements Record
   private final int[] filterColumns;
   // an array about which row groups aren't skipped
   private boolean[] includedRowGroups = null;
+  private final Configuration conf;
 
   RecordReaderImpl(Iterable<StripeInformation> stripes,
                    FileSystem fileSystem,
@@ -95,13 +97,15 @@ class RecordReaderImpl implements Record
                    boolean[] included,
                    long strideRate,
                    SearchArgument sarg,
-                   String[] columnNames
+                   String[] columnNames,
+                   Configuration conf
                   ) throws IOException {
     this.file = fileSystem.open(path);
     this.codec = codec;
     this.types = types;
     this.bufferSize = bufferSize;
     this.included = included;
+    this.conf = conf;
     this.sarg = sarg;
     if (sarg != null) {
       sargLeaves = sarg.getLeaves();
@@ -128,7 +132,7 @@ class RecordReaderImpl implements Record
 
     firstRow = skippedRows;
     totalRowCount = rows;
-    reader = createTreeReader(path, 0, types, included);
+    reader = createTreeReader(path, 0, types, included, conf);
     indexes = new OrcProto.RowIndex[types.size()];
     rowIndexStride = strideRate;
     advanceToNextRow(0L);
@@ -163,10 +167,12 @@ class RecordReaderImpl implements Record
     protected final int columnId;
     private BitFieldReader present = null;
     protected boolean valuePresent = false;
+    protected final Configuration conf;
 
-    TreeReader(Path path, int columnId) {
+    TreeReader(Path path, int columnId, Configuration conf) {
       this.path = path;
       this.columnId = columnId;
+      this.conf = conf;
     }
 
     void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
@@ -182,7 +188,7 @@ class RecordReaderImpl implements Record
       switch (kind) {
       case DIRECT_V2:
       case DICTIONARY_V2:
-        return new RunLengthIntegerReaderV2(in, signed);
+        return new RunLengthIntegerReaderV2(in, signed, conf);
       case DIRECT:
       case DICTIONARY:
         return new RunLengthIntegerReader(in, signed);
@@ -275,8 +281,8 @@ class RecordReaderImpl implements Record
   private static class BooleanTreeReader extends TreeReader{
     private BitFieldReader reader = null;
 
-    BooleanTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    BooleanTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -335,8 +341,8 @@ class RecordReaderImpl implements Record
   private static class ByteTreeReader extends TreeReader{
     private RunLengthByteReader reader = null;
 
-    ByteTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    ByteTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -395,8 +401,8 @@ class RecordReaderImpl implements Record
   private static class ShortTreeReader extends TreeReader{
     private IntegerReader reader = null;
 
-    ShortTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    ShortTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -465,8 +471,8 @@ class RecordReaderImpl implements Record
   private static class IntTreeReader extends TreeReader{
     private IntegerReader reader = null;
 
-    IntTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    IntTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -535,8 +541,8 @@ class RecordReaderImpl implements Record
   private static class LongTreeReader extends TreeReader{
     private IntegerReader reader = null;
 
-    LongTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    LongTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -605,8 +611,8 @@ class RecordReaderImpl implements Record
   private static class FloatTreeReader extends TreeReader{
     private InStream stream;
 
-    FloatTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    FloatTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -685,8 +691,8 @@ class RecordReaderImpl implements Record
   private static class DoubleTreeReader extends TreeReader{
     private InStream stream;
 
-    DoubleTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    DoubleTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -764,8 +770,8 @@ class RecordReaderImpl implements Record
     private InStream stream;
     private IntegerReader lengths = null;
 
-    BinaryTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    BinaryTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -843,8 +849,8 @@ class RecordReaderImpl implements Record
     private IntegerReader nanos = null;
     private final LongColumnVector nanoVector = new LongColumnVector();
 
-    TimestampTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    TimestampTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -970,8 +976,8 @@ class RecordReaderImpl implements Record
   private static class DateTreeReader extends TreeReader{
     private IntegerReader reader = null;
 
-    DateTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    DateTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -1045,8 +1051,8 @@ class RecordReaderImpl implements Record
     private final int precision;
     private final int scale;
 
-    DecimalTreeReader(Path path, int columnId, int precision, int scale) {
-      super(path, columnId);
+    DecimalTreeReader(Path path, int columnId, int precision, int scale, Configuration conf) {
+      super(path, columnId, conf);
       this.precision = precision;
       this.scale = scale;
     }
@@ -1155,8 +1161,8 @@ class RecordReaderImpl implements Record
   private static class StringTreeReader extends TreeReader {
     private TreeReader reader;
 
-    StringTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    StringTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
     }
 
     @Override
@@ -1173,11 +1179,11 @@ class RecordReaderImpl implements Record
       switch (encodings.get(columnId).getKind()) {
         case DIRECT:
         case DIRECT_V2:
-          reader = new StringDirectTreeReader(path, columnId);
+          reader = new StringDirectTreeReader(path, columnId, conf);
           break;
         case DICTIONARY:
         case DICTIONARY_V2:
-          reader = new StringDictionaryTreeReader(path, columnId);
+          reader = new StringDictionaryTreeReader(path, columnId, conf);
           break;
         default:
           throw new IllegalArgumentException("Unsupported encoding " +
@@ -1217,8 +1223,8 @@ class RecordReaderImpl implements Record
 
     private final LongColumnVector scratchlcv;
 
-    StringDirectTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    StringDirectTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
       scratchlcv = new LongColumnVector();
     }
 
@@ -1366,8 +1372,8 @@ class RecordReaderImpl implements Record
     private byte[] dictionaryBufferInBytesCache = null;
     private final LongColumnVector scratchlcv;
 
-    StringDictionaryTreeReader(Path path, int columnId) {
-      super(path, columnId);
+    StringDictionaryTreeReader(Path path, int columnId, Configuration conf) {
+      super(path, columnId, conf);
       scratchlcv = new LongColumnVector();
     }
 
@@ -1532,8 +1538,8 @@ class RecordReaderImpl implements Record
   private static class CharTreeReader extends StringTreeReader {
     int maxLength;
 
-    CharTreeReader(Path path, int columnId, int maxLength) {
-      super(path, columnId);
+    CharTreeReader(Path path, int columnId, int maxLength, Configuration conf) {
+      super(path, columnId, conf);
       this.maxLength = maxLength;
     }
 
@@ -1560,8 +1566,8 @@ class RecordReaderImpl implements Record
   private static class VarcharTreeReader extends StringTreeReader {
     int maxLength;
 
-    VarcharTreeReader(Path path, int columnId, int maxLength) {
-      super(path, columnId);
+    VarcharTreeReader(Path path, int columnId, int maxLength, Configuration conf) {
+      super(path, columnId, conf);
       this.maxLength = maxLength;
     }
 
@@ -1591,8 +1597,8 @@ class RecordReaderImpl implements Record
 
     StructTreeReader(Path path, int columnId,
                      List<OrcProto.Type> types,
-                     boolean[] included) throws IOException {
-      super(path, columnId);
+                     boolean[] included, Configuration conf) throws IOException {
+      super(path, columnId, conf);
       OrcProto.Type type = types.get(columnId);
       int fieldCount = type.getFieldNamesCount();
       this.fields = new TreeReader[fieldCount];
@@ -1600,7 +1606,7 @@ class RecordReaderImpl implements Record
       for(int i=0; i < fieldCount; ++i) {
         int subtype = type.getSubtypes(i);
         if (included == null || included[subtype]) {
-          this.fields[i] = createTreeReader(path, subtype, types, included);
+          this.fields[i] = createTreeReader(path, subtype, types, included, conf);
         }
         this.fieldNames[i] = type.getFieldNames(i);
       }
@@ -1693,15 +1699,15 @@ class RecordReaderImpl implements Record
 
     UnionTreeReader(Path path, int columnId,
                     List<OrcProto.Type> types,
-                    boolean[] included) throws IOException {
-      super(path, columnId);
+                    boolean[] included, Configuration conf) throws IOException {
+      super(path, columnId, conf);
       OrcProto.Type type = types.get(columnId);
       int fieldCount = type.getSubtypesCount();
       this.fields = new TreeReader[fieldCount];
       for(int i=0; i < fieldCount; ++i) {
         int subtype = type.getSubtypes(i);
         if (included == null || included[subtype]) {
-          this.fields[i] = createTreeReader(path, subtype, types, included);
+          this.fields[i] = createTreeReader(path, subtype, types, included, conf);
         }
       }
     }
@@ -1772,11 +1778,11 @@ class RecordReaderImpl implements Record
 
     ListTreeReader(Path path, int columnId,
                    List<OrcProto.Type> types,
-                   boolean[] included) throws IOException {
-      super(path, columnId);
+                   boolean[] included, Configuration conf) throws IOException {
+      super(path, columnId, conf);
       OrcProto.Type type = types.get(columnId);
       elementReader = createTreeReader(path, type.getSubtypes(0), types,
-          included);
+          included, conf);
     }
 
     @Override
@@ -1863,18 +1869,18 @@ class RecordReaderImpl implements Record
     MapTreeReader(Path path,
                   int columnId,
                   List<OrcProto.Type> types,
-                  boolean[] included) throws IOException {
-      super(path, columnId);
+                  boolean[] included, Configuration conf) throws IOException {
+      super(path, columnId, conf);
       OrcProto.Type type = types.get(columnId);
       int keyColumn = type.getSubtypes(0);
       int valueColumn = type.getSubtypes(1);
       if (included == null || included[keyColumn]) {
-        keyReader = createTreeReader(path, keyColumn, types, included);
+        keyReader = createTreeReader(path, keyColumn, types, included, conf);
       } else {
         keyReader = null;
       }
       if (included == null || included[valueColumn]) {
-        valueReader = createTreeReader(path, valueColumn, types, included);
+        valueReader = createTreeReader(path, valueColumn, types, included, conf);
       } else {
         valueReader = null;
       }
@@ -1956,54 +1962,55 @@ class RecordReaderImpl implements Record
   private static TreeReader createTreeReader(Path path,
                                              int columnId,
                                              List<OrcProto.Type> types,
-                                             boolean[] included
+                                             boolean[] included,
+                                             Configuration conf
                                             ) throws IOException {
     OrcProto.Type type = types.get(columnId);
     switch (type.getKind()) {
       case BOOLEAN:
-        return new BooleanTreeReader(path, columnId);
+        return new BooleanTreeReader(path, columnId, conf);
       case BYTE:
-        return new ByteTreeReader(path, columnId);
+        return new ByteTreeReader(path, columnId, conf);
       case DOUBLE:
-        return new DoubleTreeReader(path, columnId);
+        return new DoubleTreeReader(path, columnId, conf);
       case FLOAT:
-        return new FloatTreeReader(path, columnId);
+        return new FloatTreeReader(path, columnId, conf);
       case SHORT:
-        return new ShortTreeReader(path, columnId);
+        return new ShortTreeReader(path, columnId, conf);
       case INT:
-        return new IntTreeReader(path, columnId);
+        return new IntTreeReader(path, columnId, conf);
       case LONG:
-        return new LongTreeReader(path, columnId);
+        return new LongTreeReader(path, columnId, conf);
       case STRING:
-        return new StringTreeReader(path, columnId);
+        return new StringTreeReader(path, columnId, conf);
       case CHAR:
         if (!type.hasMaximumLength()) {
           throw new IllegalArgumentException("ORC char type has no length specified");
         }
-        return new CharTreeReader(path, columnId, type.getMaximumLength());
+        return new CharTreeReader(path, columnId, type.getMaximumLength(), conf);
       case VARCHAR:
         if (!type.hasMaximumLength()) {
           throw new IllegalArgumentException("ORC varchar type has no length specified");
         }
-        return new VarcharTreeReader(path, columnId, type.getMaximumLength());
+        return new VarcharTreeReader(path, columnId, type.getMaximumLength(), conf);
       case BINARY:
-        return new BinaryTreeReader(path, columnId);
+        return new BinaryTreeReader(path, columnId, conf);
       case TIMESTAMP:
-        return new TimestampTreeReader(path, columnId);
+        return new TimestampTreeReader(path, columnId, conf);
       case DATE:
-        return new DateTreeReader(path, columnId);
+        return new DateTreeReader(path, columnId, conf);
       case DECIMAL:
         int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION;
         int scale =  type.hasScale()? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE;
-        return new DecimalTreeReader(path, columnId, precision, scale);
+        return new DecimalTreeReader(path, columnId, precision, scale, conf);
       case STRUCT:
-        return new StructTreeReader(path, columnId, types, included);
+        return new StructTreeReader(path, columnId, types, included, conf);
       case LIST:
-        return new ListTreeReader(path, columnId, types, included);
+        return new ListTreeReader(path, columnId, types, included, conf);
       case MAP:
-        return new MapTreeReader(path, columnId, types, included);
+        return new MapTreeReader(path, columnId, types, included, conf);
       case UNION:
-        return new UnionTreeReader(path, columnId, types, included);
+        return new UnionTreeReader(path, columnId, types, included, conf);
       default:
         throw new IllegalArgumentException("Unsupported type " +
           type.getKind());

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java Fri Feb 21 02:46:19 2014
@@ -20,6 +20,10 @@ package org.apache.hadoop.hive.ql.io.orc
 import java.io.EOFException;
 import java.io.IOException;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerWriterV2.EncodingType;
 
@@ -34,10 +38,13 @@ class RunLengthIntegerReaderV2 implement
   private final long[] literals = new long[RunLengthIntegerWriterV2.MAX_SCOPE];
   private int numLiterals = 0;
   private int used = 0;
+  private final boolean skipCorrupt;
 
-  RunLengthIntegerReaderV2(InStream input, boolean signed) throws IOException {
+  RunLengthIntegerReaderV2(InStream input, boolean signed,
+      Configuration conf) throws IOException {
     this.input = input;
     this.signed = signed;
+    this.skipCorrupt = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA);
   }
 
   private void readValues() throws IOException {
@@ -163,14 +170,20 @@ class RunLengthIntegerReaderV2 implement
 
     // unpack the patch blob
     long[] unpackedPatch = new long[pl];
-    SerializationUtils.readInts(unpackedPatch, 0, pl, pw + pgw, input);
+
+    if ((pw + pgw) > 64 && !skipCorrupt) {
+      throw new IOException(ErrorMsg.ORC_CORRUPTED_READ.getMsg());
+    }
+    int bitSize = SerializationUtils.getClosestFixedBits(pw + pgw);
+    SerializationUtils.readInts(unpackedPatch, 0, pl, bitSize, input);
 
     // apply the patch directly when decoding the packed data
     int patchIdx = 0;
     long currGap = 0;
     long currPatch = 0;
+    long patchMask = ((1L << pw) - 1);
     currGap = unpackedPatch[patchIdx] >>> pw;
-    currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+    currPatch = unpackedPatch[patchIdx] & patchMask;
     long actualGap = 0;
 
     // special case: gap is >255 then patch value will be 0.
@@ -179,7 +192,7 @@ class RunLengthIntegerReaderV2 implement
       actualGap += 255;
       patchIdx++;
       currGap = unpackedPatch[patchIdx] >>> pw;
-      currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+      currPatch = unpackedPatch[patchIdx] & patchMask;
     }
     // add the left over gap
     actualGap += currGap;
@@ -199,7 +212,7 @@ class RunLengthIntegerReaderV2 implement
         if (patchIdx < pl) {
           // read the next gap and patch
           currGap = unpackedPatch[patchIdx] >>> pw;
-          currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+          currPatch = unpackedPatch[patchIdx] & patchMask;
           actualGap = 0;
 
           // special case: gap is >255 then patch will be 0. if gap is
@@ -208,7 +221,7 @@ class RunLengthIntegerReaderV2 implement
             actualGap += 255;
             patchIdx++;
             currGap = unpackedPatch[patchIdx] >>> pw;
-            currPatch = unpackedPatch[patchIdx] & ((1 << pw) - 1);
+            currPatch = unpackedPatch[patchIdx] & patchMask;
           }
           // add the left over gap
           actualGap += currGap;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java Fri Feb 21 02:46:19 2014
@@ -567,6 +567,7 @@ class RunLengthIntegerWriterV2 implement
     // since we are considering only 95 percentile, the size of gap and
     // patch array can contain only be 5% values
     patchLength = (int) Math.ceil((baseRedLiterals.length * 0.05));
+
     int[] gapList = new int[patchLength];
     long[] patchList = new long[patchLength];
 
@@ -574,6 +575,15 @@ class RunLengthIntegerWriterV2 implement
     patchWidth = brBits100p - brBits95p;
     patchWidth = SerializationUtils.getClosestFixedBits(patchWidth);
 
+    // if patch bit requirement is 64 then it will not possible to pack
+    // gap and patch together in a long. To make sure gap and patch can be
+    // packed together adjust the patch width
+    if (patchWidth == 64) {
+      patchWidth = 56;
+      brBits95p = 8;
+      mask = (1L << brBits95p) - 1;
+    }
+
     int gapIdx = 0;
     int patchIdx = 0;
     int prev = 0;
@@ -642,7 +652,7 @@ class RunLengthIntegerWriterV2 implement
       long g = gapList[gapIdx++];
       long p = patchList[patchIdx++];
       while (g > 255) {
-        gapVsPatchList[i++] = (255 << patchWidth) | 0;
+        gapVsPatchList[i++] = (255L << patchWidth);
         g -= 255;
       }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java Fri Feb 21 02:46:19 2014
@@ -151,16 +151,16 @@ public class VectorizedOrcInputFormat ex
 
     if(!(fSplit instanceof OrcSplit)){
       //If CombineHiveInputFormat is used, it works with FileSplit and not OrcSplit
-      reader = OrcFile.createReader(fs, path);
+      reader = OrcFile.createReader(fs, path, conf);
     } else {
       //We have OrcSplit, which may have footer metadata cached, so use the appropriate reader
       //constructor
       OrcSplit orcSplit = (OrcSplit) fSplit;
       if (orcSplit.hasFooter()) {
         FileMetaInfo fMetaInfo = orcSplit.getFileMetaInfo();
-        reader = OrcFile.createReader(fs, path, fMetaInfo);
+        reader = OrcFile.createReader(fs, path, fMetaInfo, conf);
       } else {
-        reader = OrcFile.createReader(fs, path);
+        reader = OrcFile.createReader(fs, path, conf);
       }
     }
 
@@ -176,7 +176,7 @@ public class VectorizedOrcInputFormat ex
     }
     for (FileStatus file : files) {
       try {
-        OrcFile.createReader(fs, file.getPath());
+        OrcFile.createReader(fs, file.getPath(), conf);
       } catch (IOException e) {
         return false;
       }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestIntegerCompressionReader.java Fri Feb 21 02:46:19 2014
@@ -22,6 +22,7 @@ import static junit.framework.Assert.ass
 import java.nio.ByteBuffer;
 import java.util.Random;
 
+import org.apache.hadoop.conf.Configuration;
 import org.junit.Test;
 
 public class TestIntegerCompressionReader {
@@ -57,7 +58,8 @@ public class TestIntegerCompressionReade
       new RunLengthIntegerReaderV2(InStream.create
                                    ("test", new ByteBuffer[]{inBuf},
                                     new long[]{0}, inBuf.remaining(),
-                                    codec, 1000), true);
+                                    codec, 1000), true,
+                                    new Configuration());
     for(int i=0; i < 2048; ++i) {
       int x = (int) in.next();
       if (i < 1024) {
@@ -112,7 +114,8 @@ public class TestIntegerCompressionReade
                                                    new ByteBuffer[]{inBuf},
                                                    new long[]{0},
                                                    inBuf.remaining(),
-                                                   null, 100), true);
+                                                   null, 100), true,
+                                                   new Configuration());
     for(int i=0; i < 2048; i += 10) {
       int x = (int) in.next();
       if (i < 1024) {

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java Fri Feb 21 02:46:19 2014
@@ -191,7 +191,7 @@ public class TestNewInputOutputFormat {
     
     Path outputFilePath = new Path(outputPath, "part-m-00000");
     assertTrue(localFs.exists(outputFilePath));
-    Reader reader = OrcFile.createReader(localFs, outputFilePath);
+    Reader reader = OrcFile.createReader(localFs, outputFilePath, conf);
     assertTrue(reader.getNumberOfRows() == rownum);
     assertEquals(reader.getCompression(), CompressionKind.ZLIB);
     StructObjectInspector soi =
@@ -248,7 +248,7 @@ public class TestNewInputOutputFormat {
     assertTrue(result);
     
     Path outputFilePath = new Path(outputPath, "part-m-00000");
-    Reader reader = OrcFile.createReader(localFs, outputFilePath);
+    Reader reader = OrcFile.createReader(localFs, outputFilePath, conf);
     assertEquals(reader.getCompression(), CompressionKind.SNAPPY);
     
     localFs.delete(outputPath, true);
@@ -351,7 +351,7 @@ public class TestNewInputOutputFormat {
     assertTrue(result);
     
     Path outputFilePath = new Path(outputPath, "part-r-00000");
-    Reader reader = OrcFile.createReader(localFs, outputFilePath);
+    Reader reader = OrcFile.createReader(localFs, outputFilePath, conf);
     
     RecordReader rows = reader.rows(null);
     ObjectInspector orcOi = reader.getObjectInspector();

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java Fri Feb 21 02:46:19 2014
@@ -128,7 +128,7 @@ public class TestNewIntegerEncoding {
     writer.addRow(new Row(111, 1111L));
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     while (rows.hasNext()) {
       Object row = rows.next(null);
@@ -162,7 +162,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -197,7 +197,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -228,7 +228,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -259,7 +259,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -290,7 +290,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -321,7 +321,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -351,7 +351,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -382,7 +382,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -413,7 +413,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -444,7 +444,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -478,7 +478,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -512,7 +512,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -554,7 +554,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -596,7 +596,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -638,7 +638,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -671,7 +671,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -706,7 +706,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -741,7 +741,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -775,7 +775,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -809,7 +809,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -843,7 +843,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -877,7 +877,166 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
+    RecordReader rows = reader.rows(null);
+    int idx = 0;
+    while (rows.hasNext()) {
+      Object row = rows.next(null);
+      assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get());
+    }
+  }
+
+  @Test
+  public void testPatchedBaseMax1() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
+          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for (int i = 0; i < 5120; i++) {
+      input.add((long) rand.nextInt(60));
+    }
+    input.set(511, Long.MAX_VALUE);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
+    for (Long l : input) {
+      writer.addRow(l);
+    }
+    writer.close();
+
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
+    RecordReader rows = reader.rows(null);
+    int idx = 0;
+    while (rows.hasNext()) {
+      Object row = rows.next(null);
+      assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get());
+    }
+  }
+
+  @Test
+  public void testPatchedBaseMax2() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
+          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for (int i = 0; i < 5120; i++) {
+      input.add((long) rand.nextInt(60));
+    }
+    input.set(128, Long.MAX_VALUE);
+    input.set(256, Long.MAX_VALUE);
+    input.set(511, Long.MAX_VALUE);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
+    for (Long l : input) {
+      writer.addRow(l);
+    }
+    writer.close();
+
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
+    RecordReader rows = reader.rows(null);
+    int idx = 0;
+    while (rows.hasNext()) {
+      Object row = rows.next(null);
+      assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get());
+    }
+  }
+
+  @Test
+  public void testPatchedBaseMax3() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
+          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+
+    List<Long> input = Lists.newArrayList();
+    input.add(371946367L);
+    input.add(11963367L);
+    input.add(68639400007L);
+    input.add(100233367L);
+    input.add(6367L);
+    input.add(10026367L);
+    input.add(3670000L);
+    input.add(3602367L);
+    input.add(4719226367L);
+    input.add(7196367L);
+    input.add(444442L);
+    input.add(210267L);
+    input.add(21033L);
+    input.add(160267L);
+    input.add(400267L);
+    input.add(23634347L);
+    input.add(16027L);
+    input.add(46026367L);
+    input.add(Long.MAX_VALUE);
+    input.add(33333L);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
+    for (Long l : input) {
+      writer.addRow(l);
+    }
+    writer.close();
+
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
+    RecordReader rows = reader.rows(null);
+    int idx = 0;
+    while (rows.hasNext()) {
+      Object row = rows.next(null);
+      assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get());
+    }
+  }
+
+  @Test
+  public void testPatchedBaseMax4() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
+          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+
+    List<Long> input = Lists.newArrayList();
+    for (int i = 0; i < 25; i++) {
+      input.add(371292224226367L);
+      input.add(119622332222267L);
+      input.add(686329400222007L);
+      input.add(100233333222367L);
+      input.add(636272333322222L);
+      input.add(10202633223267L);
+      input.add(36700222022230L);
+      input.add(36023226224227L);
+      input.add(47192226364427L);
+      input.add(71963622222447L);
+      input.add(22244444222222L);
+      input.add(21220263327442L);
+      input.add(21032233332232L);
+      input.add(16026322232227L);
+      input.add(40022262272212L);
+      input.add(23634342227222L);
+      input.add(16022222222227L);
+      input.add(46026362222227L);
+      input.add(46026362222227L);
+      input.add(33322222222323L);
+    }
+    input.add(Long.MAX_VALUE);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
+    for (Long l : input) {
+      writer.addRow(l);
+    }
+    writer.close();
+
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -938,7 +1097,7 @@ public class TestNewIntegerEncoding {
 
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 0;
     while (rows.hasNext()) {
@@ -966,7 +1125,7 @@ public class TestNewIntegerEncoding {
     writer.addRow(-5535739865598783616L);
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     Object row = rows.next(null);
     assertEquals(-7486502418706614742L, ((LongWritable) row).get());
@@ -1005,7 +1164,7 @@ public class TestNewIntegerEncoding {
     }
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     int idx = 55555;
     rows.seekToRow(idx);

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Fri Feb 21 02:46:19 2014
@@ -207,7 +207,7 @@ public class TestOrcFile {
   @Test
   public void testReadFormat_0_11() throws Exception {
     Path oldFilePath = new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc"));
-    Reader reader = OrcFile.createReader(fs, oldFilePath);
+    Reader reader = OrcFile.createReader(fs, oldFilePath, conf);
 
     int stripeCount = 0;
     int rowCount = 0;
@@ -470,7 +470,7 @@ public class TestOrcFile {
     writer.addRow(new SimpleStruct(bytes(0,1,2,3,4,5), null));
     writer.addRow(new SimpleStruct(null, "hi"));
     writer.close();
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
 
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
@@ -565,7 +565,7 @@ public class TestOrcFile {
     }
 
     writer.close();
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     Metadata metadata = reader.getMetadata();
     int numStripes = metadata.getStripeStatistics().size();
     assertEquals(3, numStripes);
@@ -643,7 +643,7 @@ public class TestOrcFile {
         list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
         map(inner(5, "chani"), inner(1, "mauddib"))));
     writer.close();
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
 
     Metadata metadata = reader.getMetadata();
 
@@ -893,7 +893,7 @@ public class TestOrcFile {
       writer.addRow(inner(x, y));
     }
     writer.close();
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
 
     // check out the statistics
     ColumnStatistics[] stats = reader.getStatistics();
@@ -957,7 +957,7 @@ public class TestOrcFile {
                                          .compress(CompressionKind.NONE)
                                          .bufferSize(100));
     writer.close();
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     assertEquals(false, reader.rows(null).hasNext());
     assertEquals(CompressionKind.NONE, reader.getCompression());
     assertEquals(0, reader.getNumberOfRows());
@@ -994,7 +994,7 @@ public class TestOrcFile {
         null, null, null, null));
     writer.addUserMetadata("clobber", byteBuf(5,7,11,13,17,19));
     writer.close();
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     assertEquals(byteBuf(5,7,11,13,17,19), reader.getMetadataValue("clobber"));
     assertEquals(byteBuf(1,2,3,4,5,6,7,-1,-2,127,-128),
         reader.getMetadataValue("my.meta"));
@@ -1112,7 +1112,7 @@ public class TestOrcFile {
     union.set((byte) 0, new IntWritable(138));
     writer.addRow(row);
     writer.close();
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
     assertEquals(5309, reader.getNumberOfRows());
     DecimalColumnStatistics stats =
@@ -1243,7 +1243,7 @@ public class TestOrcFile {
           Integer.toHexString(rand.nextInt())));
     }
     writer.close();
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     RecordReader rows = reader.rows(null);
     rand = new Random(12);
     OrcStruct row = null;
@@ -1286,7 +1286,7 @@ public class TestOrcFile {
       }
     }
     writer.close();
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     assertEquals(50000, reader.getNumberOfRows());
     assertEquals(0, reader.getRowIndexStride());
     StripeInformation stripe = reader.getStripes().iterator().next();
@@ -1350,7 +1350,7 @@ public class TestOrcFile {
     }
     writer.close();
     writer = null;
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     assertEquals(COUNT, reader.getNumberOfRows());
     RecordReader rows = reader.rows(null);
     OrcStruct row = null;
@@ -1517,7 +1517,7 @@ public class TestOrcFile {
     }
     writer.close();
     assertEquals(null, memory.path);
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     int i = 0;
     for(StripeInformation stripe: reader.getStripes()) {
       i += 1;
@@ -1542,7 +1542,7 @@ public class TestOrcFile {
       writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i)));
     }
     writer.close();
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     assertEquals(3500, reader.getNumberOfRows());
 
     SearchArgument sarg = SearchArgument.FACTORY.newBuilder()

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java Fri Feb 21 02:46:19 2014
@@ -108,7 +108,7 @@ public class TestOrcNullOptimization {
                                Lists.newArrayList(new InnerStruct(100))));
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
     assertEquals(20000, reader.getNumberOfRows());
@@ -212,7 +212,7 @@ public class TestOrcNullOptimization {
                                Lists.newArrayList(new InnerStruct(100))));
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
     assertEquals(20000, reader.getNumberOfRows());
@@ -313,7 +313,7 @@ public class TestOrcNullOptimization {
                                Lists.newArrayList(new InnerStruct(100))));
     writer.close();
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
     assertEquals(8, reader.getNumberOfRows());

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSerDeStats.java Fri Feb 21 02:46:19 2014
@@ -212,7 +212,7 @@ public class TestOrcSerDeStats {
     writer.close();
     assertEquals(4, writer.getNumberOfRows());
     assertEquals(273, writer.getRawDataSize());
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     assertEquals(4, reader.getNumberOfRows());
     assertEquals(273, reader.getRawDataSize());
     assertEquals(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
@@ -310,7 +310,7 @@ public class TestOrcSerDeStats {
     assertEquals(5000, writer.getNumberOfRows());
     assertEquals(430000000, writer.getRawDataSize());
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     // stats from reader
     assertEquals(5000, reader.getNumberOfRows());
     assertEquals(430000000, reader.getRawDataSize());
@@ -341,7 +341,7 @@ public class TestOrcSerDeStats {
     assertEquals(1000, writer.getNumberOfRows());
     assertEquals(950000, writer.getRawDataSize());
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     // stats from reader
     assertEquals(1000, reader.getNumberOfRows());
     assertEquals(950000, reader.getRawDataSize());
@@ -372,7 +372,7 @@ public class TestOrcSerDeStats {
     assertEquals(1000, writer.getNumberOfRows());
     assertEquals(44500, writer.getRawDataSize());
 
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
     // stats from reader
     assertEquals(1000, reader.getNumberOfRows());
     assertEquals(44500, reader.getRawDataSize());
@@ -413,7 +413,7 @@ public class TestOrcSerDeStats {
     long rawDataSize = writer.getRawDataSize();
     assertEquals(2, rowCount);
     assertEquals(1740, rawDataSize);
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
 
     assertEquals(2, reader.getNumberOfRows());
     assertEquals(1740, reader.getRawDataSize());
@@ -506,7 +506,7 @@ public class TestOrcSerDeStats {
     long rawDataSize = writer.getRawDataSize();
     assertEquals(2, rowCount);
     assertEquals(1740, rawDataSize);
-    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
 
     assertEquals(2, reader.getNumberOfRows());
     assertEquals(1740, reader.getRawDataSize());
@@ -573,7 +573,7 @@ public class TestOrcSerDeStats {
   @Test(expected = ClassCastException.class)
   public void testSerdeStatsOldFormat() throws Exception {
     Path oldFilePath = new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc"));
-    Reader reader = OrcFile.createReader(fs, oldFilePath);
+    Reader reader = OrcFile.createReader(fs, oldFilePath, conf);
 
     int stripeCount = 0;
     int rowCount = 0;

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java?rev=1570426&r1=1570425&r2=1570426&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java Fri Feb 21 02:46:19 2014
@@ -131,8 +131,8 @@ public class TestVectorizedORCReader {
 
   private void checkVectorizedReader() throws Exception {
 
-    Reader vreader = OrcFile.createReader(testFilePath.getFileSystem(conf), testFilePath);
-    Reader reader = OrcFile.createReader(testFilePath.getFileSystem(conf), testFilePath);
+    Reader vreader = OrcFile.createReader(testFilePath.getFileSystem(conf), testFilePath, conf);
+    Reader reader = OrcFile.createReader(testFilePath.getFileSystem(conf), testFilePath, conf);
     RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows(null);
     RecordReaderImpl rr = (RecordReaderImpl) reader.rows(null);
     VectorizedRowBatch batch = null;



Mime
View raw message