hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gunt...@apache.org
Subject svn commit: r1518830 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ ql/src/test/org/apache/hadoop/hive/ql/io/orc/ ql/src/test/resources/
Date Thu, 29 Aug 2013 21:23:02 GMT
Author: gunther
Date: Thu Aug 29 21:23:02 2013
New Revision: 1518830

URL: http://svn.apache.org/r1518830
Log:
HIVE-5091: ORC files should have an option to pad stripes to the HDFS block boundaries (Owen O'Malley via Gunther Hagleitner)

Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
    hive/trunk/ql/src/test/resources/orc-file-dump.out

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1518830&r1=1518829&r2=1518830&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Thu Aug 29 21:23:02 2013
@@ -505,9 +505,8 @@ public class HiveConf extends Configurat
 
     // Maximum fraction of heap that can be used by ORC file writers
     HIVE_ORC_FILE_MEMORY_POOL("hive.exec.orc.memory.pool", 0.5f), // 50%
-    // use 0.11 version of RLE encoding. if this conf is not defined or any
-    // other value specified, ORC will use the new RLE encoding
-    HIVE_ORC_WRITE_FORMAT("hive.exec.orc.write.format", "0.11"),
+    // Define the version of the file to write
+    HIVE_ORC_WRITE_FORMAT("hive.exec.orc.write.format", null),
 
     HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD("hive.exec.orc.dictionary.key.size.threshold", 0.8f),
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1518830&r1=1518829&r2=1518830&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Thu Aug 29 21:23:02 2013
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.io.orc
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 
 import java.io.IOException;
@@ -47,19 +48,67 @@ public final class OrcFile {
    * prevent the new reader from reading ORC files generated by any released
    * version of Hive.
    */
-  public static final int MAJOR_VERSION = 0;
-  public static final int MINOR_VERSION = 11;
+  public static enum Version {
+    V_0_11("0.11", 0, 11),
+      V_0_12("0.12", 0, 12);
+
+    public static final Version CURRENT = V_0_12;
+
+    private final String name;
+    private final int major;
+    private final int minor;
+
+    private Version(String name, int major, int minor) {
+      this.name = name;
+      this.major = major;
+      this.minor = minor;
+    }
+
+    public static Version byName(String name) {
+      for(Version version: values()) {
+        if (version.name.equals(name)) {
+          return version;
+        }
+      }
+      throw new IllegalArgumentException("Unknown ORC version " + name);
+    }
+
+    /**
+     * Get the human readable name for the version.
+     */
+    public String getName() {
+      return name;
+    }
+
+    /**
+     * Get the major version number.
+     */
+    public int getMajor() {
+      return major;
+    }
+
+    /**
+     * Get the minor version number.
+     */
+    public int getMinor() {
+      return minor;
+    }
+  }
 
   // the table properties that control ORC files
   public static final String COMPRESSION = "orc.compress";
-  static final String DEFAULT_COMPRESSION = "ZLIB";
   public static final String COMPRESSION_BLOCK_SIZE = "orc.compress.size";
-  static final String DEFAULT_COMPRESSION_BLOCK_SIZE = "262144";
   public static final String STRIPE_SIZE = "orc.stripe.size";
-  static final String DEFAULT_STRIPE_SIZE = "268435456";
   public static final String ROW_INDEX_STRIDE = "orc.row.index.stride";
-  static final String DEFAULT_ROW_INDEX_STRIDE = "10000";
   public static final String ENABLE_INDEXES = "orc.create.index";
+  public static final String BLOCK_PADDING = "orc.block.padding";
+
+  static final long DEFAULT_STRIPE_SIZE = 256 * 1024 * 1024;
+  static final CompressionKind DEFAULT_COMPRESSION_KIND =
+    CompressionKind.ZLIB;
+  static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
+  static final int DEFAULT_ROW_INDEX_STRIDE = 10000;
+  static final boolean DEFAULT_BLOCK_PADDING = true;
 
   // unused
   private OrcFile() {}
@@ -77,7 +126,145 @@ public final class OrcFile {
   }
 
   /**
-   * Create an ORC file streamFactory.
+   * Options for creating ORC file writers.
+   */
+  public static class WriterOptions {
+    private final Configuration configuration;
+    private FileSystem fileSystemValue = null;
+    private ObjectInspector inspectorValue = null;
+    private long stripeSizeValue = DEFAULT_STRIPE_SIZE;
+    private int rowIndexStrideValue = DEFAULT_ROW_INDEX_STRIDE;
+    private int bufferSizeValue = DEFAULT_BUFFER_SIZE;
+    private boolean blockPaddingValue = DEFAULT_BLOCK_PADDING;
+    private CompressionKind compressValue = DEFAULT_COMPRESSION_KIND;
+    private MemoryManager memoryManagerValue;
+    private Version versionValue;
+
+    WriterOptions(Configuration conf) {
+      configuration = conf;
+      memoryManagerValue = getMemoryManager(conf);
+      String versionName =
+        conf.get(HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT.varname);
+      if (versionName == null) {
+        versionValue = Version.CURRENT;
+      } else {
+        versionValue = Version.byName(versionName);
+      }
+    }
+
+    /**
+     * Provide the filesystem for the path, if the client has it available.
+     * If it is not provided, it will be found from the path.
+     */
+    public WriterOptions fileSystem(FileSystem value) {
+      fileSystemValue = value;
+      return this;
+    }
+
+    /**
+     * Set the stripe size for the file. The writer stores the contents of the
+     * stripe in memory until this memory limit is reached and the stripe
+     * is flushed to the HDFS file and the next stripe started.
+     */
+    public WriterOptions stripeSize(long value) {
+      stripeSizeValue = value;
+      return this;
+    }
+
+    /**
+     * Set the distance between entries in the row index. The minimum value is
+     * 1000 to prevent the index from overwhelming the data. If the stride is
+     * set to 0, no indexes will be included in the file.
+     */
+    public WriterOptions rowIndexStride(int value) {
+      rowIndexStrideValue = value;
+      return this;
+    }
+
+    /**
+     * The size of the memory buffers used for compressing and storing the
+     * stripe in memory.
+     */
+    public WriterOptions bufferSize(int value) {
+      bufferSizeValue = value;
+      return this;
+    }
+
+    /**
+     * Sets whether the HDFS blocks are padded to prevent stripes from
+     * straddling blocks. Padding improves locality and thus the speed of
+     * reading, but costs space.
+     */
+    public WriterOptions blockPadding(boolean value) {
+      blockPaddingValue = value;
+      return this;
+    }
+
+    /**
+     * Sets the generic compression that is used to compress the data.
+     */
+    public WriterOptions compress(CompressionKind value) {
+      compressValue = value;
+      return this;
+    }
+
+    /**
+     * A required option that sets the object inspector for the rows. Used
+     * to determine the schema for the file.
+     */
+    public WriterOptions inspector(ObjectInspector value) {
+      inspectorValue = value;
+      return this;
+    }
+
+    /**
+     * Sets the version of the file that will be written.
+     */
+    public WriterOptions version(Version value) {
+      versionValue = value;
+      return this;
+    }
+
+    /**
+     * A package local option to set the memory manager.
+     */
+    WriterOptions memory(MemoryManager value) {
+      memoryManagerValue = value;
+      return this;
+    }
+  }
+
+  /**
+   * Create a default set of write options that can be modified.
+   */
+  public static WriterOptions writerOptions(Configuration conf) {
+    return new WriterOptions(conf);
+  }
+
+  /**
+   * Create an ORC file writer. This is the public interface for creating
+   * writers going forward and new options will only be added to this method.
+   * @param path filename to write to
+   * @param options the options
+   * @return a new ORC file writer
+   * @throws IOException
+   */
+  public static Writer createWriter(Path path,
+                                    WriterOptions opts
+                                    ) throws IOException {
+    FileSystem fs = opts.fileSystemValue == null ?
+      path.getFileSystem(opts.configuration) : opts.fileSystemValue;
+
+    return new WriterImpl(fs, path, opts.configuration, opts.inspectorValue,
+                          opts.stripeSizeValue, opts.compressValue,
+                          opts.bufferSizeValue, opts.rowIndexStrideValue,
+                          opts.memoryManagerValue, opts.blockPaddingValue,
+                          opts.versionValue);
+  }
+
+  /**
+   * Create an ORC file writer. This method is provided for API backward
+   * compatability with Hive 0.11.
    * @param fs file system
    * @param path filename to write to
    * @param inspector the ObjectInspector that inspects the rows
@@ -86,7 +273,7 @@ public final class OrcFile {
    * @param bufferSize the number of bytes to compress at once
    * @param rowIndexStride the number of rows between row index entries or
    *                       0 to suppress all indexes
-   * @return a new ORC file streamFactory
+   * @return a new ORC file writer
    * @throws IOException
    */
   public static Writer createWriter(FileSystem fs,
@@ -97,8 +284,14 @@ public final class OrcFile {
                                     CompressionKind compress,
                                     int bufferSize,
                                     int rowIndexStride) throws IOException {
-    return new WriterImpl(fs, path, conf, inspector, stripeSize, compress,
-      bufferSize, rowIndexStride, getMemoryManager(conf));
+    return createWriter(path,
+                        writerOptions(conf)
+                        .fileSystem(fs)
+                        .inspector(inspector)
+                        .stripeSize(stripeSize)
+                        .compress(compress)
+                        .bufferSize(bufferSize)
+                        .rowIndexStride(rowIndexStride));
   }
 
   private static MemoryManager memoryManager = null;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java?rev=1518830&r1=1518829&r2=1518830&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java Thu Aug 29 21:23:02 2013
@@ -47,32 +47,20 @@ public class OrcOutputFormat extends Fil
       implements RecordWriter<NullWritable, OrcSerdeRow>,
                  FileSinkOperator.RecordWriter {
     private Writer writer = null;
-    private final FileSystem fs;
     private final Path path;
-    private final Configuration conf;
-    private final long stripeSize;
-    private final int compressionSize;
-    private final CompressionKind compress;
-    private final int rowIndexStride;
-
-    OrcRecordWriter(FileSystem fs, Path path, Configuration conf,
-                    String stripeSize, String compress,
-                    String compressionSize, String rowIndexStride) {
-      this.fs = fs;
+    private final OrcFile.WriterOptions options;
+
+    OrcRecordWriter(Path path, OrcFile.WriterOptions options) {
       this.path = path;
-      this.conf = conf;
-      this.stripeSize = Long.valueOf(stripeSize);
-      this.compress = CompressionKind.valueOf(compress);
-      this.compressionSize = Integer.valueOf(compressionSize);
-      this.rowIndexStride = Integer.valueOf(rowIndexStride);
+      this.options = options;
     }
 
     @Override
     public void write(NullWritable nullWritable,
                       OrcSerdeRow row) throws IOException {
       if (writer == null) {
-        writer = OrcFile.createWriter(fs, path, this.conf, row.getInspector(),
-            stripeSize, compress, compressionSize, rowIndexStride);
+        options.inspector(row.getInspector());
+        writer = OrcFile.createWriter(path, options);
       }
       writer.addRow(row.getRow());
     }
@@ -81,9 +69,8 @@ public class OrcOutputFormat extends Fil
     public void write(Writable row) throws IOException {
       OrcSerdeRow serdeRow = (OrcSerdeRow) row;
       if (writer == null) {
-        writer = OrcFile.createWriter(fs, path, this.conf,
-            serdeRow.getInspector(), stripeSize, compress, compressionSize,
-            rowIndexStride);
+        options.inspector(serdeRow.getInspector());
+        writer = OrcFile.createWriter(path, options);
       }
       writer.addRow(serdeRow.getRow());
     }
@@ -102,8 +89,8 @@ public class OrcOutputFormat extends Fil
         ObjectInspector inspector = ObjectInspectorFactory.
             getStandardStructObjectInspector(new ArrayList<String>(),
                 new ArrayList<ObjectInspector>());
-        writer = OrcFile.createWriter(fs, path, this.conf, inspector,
-            stripeSize, compress, compressionSize, rowIndexStride);
+        options.inspector(inspector);
+        writer = OrcFile.createWriter(path, options);
       }
       writer.close();
     }
@@ -113,9 +100,8 @@ public class OrcOutputFormat extends Fil
   public RecordWriter<NullWritable, OrcSerdeRow>
       getRecordWriter(FileSystem fileSystem, JobConf conf, String name,
                       Progressable reporter) throws IOException {
-    return new OrcRecordWriter(fileSystem,  new Path(name), conf,
-      OrcFile.DEFAULT_STRIPE_SIZE, OrcFile.DEFAULT_COMPRESSION,
-      OrcFile.DEFAULT_COMPRESSION_BLOCK_SIZE, OrcFile.DEFAULT_ROW_INDEX_STRIDE);
+    return new
+      OrcRecordWriter(new Path(name), OrcFile.writerOptions(conf));
   }
 
   @Override
@@ -126,20 +112,42 @@ public class OrcOutputFormat extends Fil
                          boolean isCompressed,
                          Properties tableProperties,
                          Progressable reporter) throws IOException {
-    String stripeSize = tableProperties.getProperty(OrcFile.STRIPE_SIZE,
-        OrcFile.DEFAULT_STRIPE_SIZE);
-    String compression = tableProperties.getProperty(OrcFile.COMPRESSION,
-        OrcFile.DEFAULT_COMPRESSION);
-    String compressionSize =
-      tableProperties.getProperty(OrcFile.COMPRESSION_BLOCK_SIZE,
-        OrcFile.DEFAULT_COMPRESSION_BLOCK_SIZE);
-    String rowIndexStride =
-        tableProperties.getProperty(OrcFile.ROW_INDEX_STRIDE,
-            OrcFile.DEFAULT_ROW_INDEX_STRIDE);
-    if ("false".equals(tableProperties.getProperty(OrcFile.ENABLE_INDEXES))) {
-      rowIndexStride = "0";
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf);
+    if (tableProperties.containsKey(OrcFile.STRIPE_SIZE)) {
+      options.stripeSize(Long.parseLong
+                           (tableProperties.getProperty(OrcFile.STRIPE_SIZE)));
+    }
+
+    if (tableProperties.containsKey(OrcFile.COMPRESSION)) {
+      options.compress(CompressionKind.valueOf
+                           (tableProperties.getProperty(OrcFile.COMPRESSION)));
+    }
+
+    if (tableProperties.containsKey(OrcFile.COMPRESSION_BLOCK_SIZE)) {
+      options.bufferSize(Integer.parseInt
+                         (tableProperties.getProperty
+                            (OrcFile.COMPRESSION_BLOCK_SIZE)));
+    }
+
+    if (tableProperties.containsKey(OrcFile.ROW_INDEX_STRIDE)) {
+      options.rowIndexStride(Integer.parseInt
+                             (tableProperties.getProperty
+                              (OrcFile.ROW_INDEX_STRIDE)));
     }
-    return new OrcRecordWriter(path.getFileSystem(conf), path, conf,
-      stripeSize, compression, compressionSize, rowIndexStride);
+
+    if (tableProperties.containsKey(OrcFile.ENABLE_INDEXES)) {
+      if ("false".equals(tableProperties.getProperty
+                         (OrcFile.ENABLE_INDEXES))) {
+        options.rowIndexStride(0);
+      }
+    }
+
+    if (tableProperties.containsKey(OrcFile.BLOCK_PADDING)) {
+      options.blockPadding(Boolean.parseBoolean
+                           (tableProperties.getProperty
+                            (OrcFile.BLOCK_PADDING)));
+    }
+
+    return new OrcRecordWriter(path, options);
   }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1518830&r1=1518829&r2=1518830&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Thu Aug 29 21:23:02 2013
@@ -248,11 +248,13 @@ final class ReaderImpl implements Reader
       if (version.size() >= 2) {
         minor = version.get(1);
       }
-      if (major > OrcFile.MAJOR_VERSION ||
-          (major == OrcFile.MAJOR_VERSION && minor > OrcFile.MINOR_VERSION)) {
-        log.warn("ORC file " + path + " was written by a future Hive version " +
-            versionString(version) + ". This file may not be readable by " +
-            "this version of Hive.");
+      if (major > OrcFile.Version.CURRENT.getMajor() ||
+          (major == OrcFile.Version.CURRENT.getMajor() &&
+           minor > OrcFile.Version.CURRENT.getMinor())) {
+        log.warn("ORC file " + path +
+                 " was written by a future Hive version " +
+                 versionString(version) +
+                 ". This file may not be readable by this version of Hive.");
       }
     }
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1518830&r1=1518829&r2=1518830&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Thu Aug 29 21:23:02 2013
@@ -82,13 +82,18 @@ class WriterImpl implements Writer, Memo
   private static final int HDFS_BUFFER_SIZE = 256 * 1024;
   private static final int MIN_ROW_INDEX_STRIDE = 1000;
 
+  // HDFS requires blocks < 2GB and multiples of 512, so pick 1.5GB
+  private static final long MAX_BLOCK_SIZE = 1536 * 1024 * 1024;
+
   private final FileSystem fs;
   private final Path path;
   private final long stripeSize;
   private final int rowIndexStride;
   private final CompressionKind compress;
   private final CompressionCodec codec;
+  private final boolean addBlockPadding;
   private final int bufferSize;
+  private final long blockSize;
   // the streams that make up the current stripe
   private final Map<StreamName, BufferedStream> streams =
     new TreeMap<StreamName, BufferedStream>();
@@ -113,6 +118,7 @@ class WriterImpl implements Writer, Memo
       OrcProto.RowIndex.newBuilder();
   private final boolean buildIndex;
   private final MemoryManager memoryManager;
+  private final OrcFile.Version version;
 
   private final Configuration conf;
 
@@ -124,11 +130,17 @@ class WriterImpl implements Writer, Memo
              CompressionKind compress,
              int bufferSize,
              int rowIndexStride,
-             MemoryManager memoryManager) throws IOException {
+             MemoryManager memoryManager,
+             boolean addBlockPadding,
+             OrcFile.Version version) throws IOException {
     this.fs = fs;
     this.path = path;
     this.conf = conf;
     this.stripeSize = stripeSize;
+    this.version = version;
+    this.addBlockPadding = addBlockPadding;
+    // pick large block size to minimize block over or under hangs
+    this.blockSize = Math.min(MAX_BLOCK_SIZE, 2 * stripeSize);
     this.compress = compress;
     this.bufferSize = bufferSize;
     this.rowIndexStride = rowIndexStride;
@@ -249,6 +261,19 @@ class WriterImpl implements Writer, Memo
     }
 
     /**
+     * Get the number of bytes that will be written to the output. Assumes
+     * the stream has already been flushed.
+     * @return the number of bytes
+     */
+    public long getOutputSize() {
+      long result = 0;
+      for(ByteBuffer buffer: output) {
+        result += buffer.remaining();
+      }
+      return result;
+    }
+
+    /**
      * Write the saved compressed buffers to the OutputStream.
      * @param out the stream to write to
      * @throws IOException
@@ -359,6 +384,13 @@ class WriterImpl implements Writer, Memo
     public Configuration getConfiguration() {
       return conf;
     }
+
+    /**
+     * Get the version of the file to write.
+     */
+    public OrcFile.Version getVersion() {
+      return version;
+    }
   }
 
   /**
@@ -442,20 +474,7 @@ class WriterImpl implements Writer, Memo
     }
 
     boolean isNewWriteFormat(StreamFactory writer) {
-      String writeFormat = writer.getConfiguration().get(
-          HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT.varname);
-      if (writeFormat == null) {
-        LOG.warn("ORC write format not defined. Using 0.12 ORC write format.");
-        return true;
-      }
-      if (writeFormat
-          .equals(HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT.defaultVal)) {
-        LOG.info("Using 0.11 ORC write format.");
-        return false;
-      }
-
-      LOG.info("Using 0.12 ORC write format.");
-      return true;
+      return writer.getVersion() != OrcFile.Version.V_0_11;
     }
 
     /**
@@ -874,9 +893,10 @@ class WriterImpl implements Writer, Memo
       // Set the flag indicating whether or not to use dictionary encoding
       // based on whether or not the fraction of distinct keys over number of
       // non-null rows is less than the configured threshold
-      useDictionaryEncoding = rows.size() > 0 &&
-        (float)(dictionary.size()) / rows.size() <=
-          dictionaryKeySizeThreshold;
+      useDictionaryEncoding =
+        (!isDirectV2) || (rows.size() > 0 &&
+                          (float)(dictionary.size()) / rows.size() <=
+                            dictionaryKeySizeThreshold);
       final int[] dumpOrder = new int[dictionary.size()];
 
       if (useDictionaryEncoding) {
@@ -1600,12 +1620,11 @@ class WriterImpl implements Writer, Memo
   private void ensureWriter() throws IOException {
     if (rawWriter == null) {
       rawWriter = fs.create(path, false, HDFS_BUFFER_SIZE,
-        fs.getDefaultReplication(),
-          Math.min(stripeSize * 2L, Integer.MAX_VALUE));
+                            fs.getDefaultReplication(), blockSize);
       rawWriter.writeBytes(OrcFile.MAGIC);
       headerLength = rawWriter.getPos();
       writer = new OutStream("metadata", bufferSize, codec,
-        new DirectStream(rawWriter));
+                             new DirectStream(rawWriter));
       protobufWriter = CodedOutputStream.newInstance(writer);
     }
   }
@@ -1621,43 +1640,70 @@ class WriterImpl implements Writer, Memo
       createRowIndexEntry();
     }
     if (rowsInStripe != 0) {
+
+      // finalize the data for the stripe
       int requiredIndexEntries = rowIndexStride == 0 ? 0 :
           (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride);
       OrcProto.StripeFooter.Builder builder =
           OrcProto.StripeFooter.newBuilder();
       treeWriter.writeStripe(builder, requiredIndexEntries);
-      long start = rawWriter.getPos();
-      long section = start;
-      long indexEnd = start;
+      long indexSize = 0;
+      long dataSize = 0;
       for(Map.Entry<StreamName, BufferedStream> pair: streams.entrySet()) {
         BufferedStream stream = pair.getValue();
         if (!stream.isSuppressed()) {
           stream.flush();
-          stream.spillTo(rawWriter);
-          long end = rawWriter.getPos();
           StreamName name = pair.getKey();
+          long streamSize = pair.getValue().getOutputSize();
           builder.addStreams(OrcProto.Stream.newBuilder()
-              .setColumn(name.getColumn())
-              .setKind(name.getKind())
-              .setLength(end-section));
-          section = end;
+                             .setColumn(name.getColumn())
+                             .setKind(name.getKind())
+                             .setLength(streamSize));
           if (StreamName.Area.INDEX == name.getArea()) {
-            indexEnd = end;
+            indexSize += streamSize;
+          } else {
+            dataSize += streamSize;
           }
         }
+      }
+      OrcProto.StripeFooter footer = builder.build();
+
+      // Do we need to pad the file so the stripe doesn't straddle a block
+      // boundary?
+      long start = rawWriter.getPos();
+      long stripeSize = indexSize + dataSize + footer.getSerializedSize();
+      if (addBlockPadding &&
+          stripeSize < blockSize &&
+          (start % blockSize) + stripeSize > blockSize) {
+        long padding = blockSize - (start % blockSize);
+        byte[] pad = new byte[(int) Math.min(HDFS_BUFFER_SIZE, padding)];
+        start += padding;
+        while (padding > 0) {
+          int writeLen = (int) Math.min(padding, pad.length);
+          rawWriter.write(pad, 0, writeLen);
+          padding -= writeLen;
+        }
+      }
+
+      // write out the data streams
+      for(Map.Entry<StreamName, BufferedStream> pair: streams.entrySet()) {
+        BufferedStream stream = pair.getValue();
+        if (!stream.isSuppressed()) {
+          stream.spillTo(rawWriter);
+        }
         stream.clear();
       }
-      builder.build().writeTo(protobufWriter);
+      footer.writeTo(protobufWriter);
       protobufWriter.flush();
       writer.flush();
-      long end = rawWriter.getPos();
+      long footerLength = rawWriter.getPos() - start - dataSize - indexSize;
       OrcProto.StripeInformation dirEntry =
           OrcProto.StripeInformation.newBuilder()
               .setOffset(start)
-              .setIndexLength(indexEnd - start)
-              .setDataLength(section - indexEnd)
               .setNumberOfRows(rowsInStripe)
-              .setFooterLength(end - section).build();
+              .setIndexLength(indexSize)
+              .setDataLength(dataSize)
+              .setFooterLength(footerLength).build();
       stripes.add(dirEntry);
       rowCount += rowsInStripe;
       rowsInStripe = 0;
@@ -1704,7 +1750,8 @@ class WriterImpl implements Writer, Memo
         .setName(entry.getKey()).setValue(entry.getValue()));
     }
     long startPosn = rawWriter.getPos();
-    builder.build().writeTo(protobufWriter);
+    OrcProto.Footer footer = builder.build();
+    footer.writeTo(protobufWriter);
     protobufWriter.flush();
     writer.flush();
     return (int) (rawWriter.getPos() - startPosn);
@@ -1716,8 +1763,8 @@ class WriterImpl implements Writer, Memo
         .setCompression(writeCompressionKind(compress))
         .setFooterLength(footerLength)
         .setMagic(OrcFile.MAGIC)
-        .addVersion(OrcFile.MAJOR_VERSION)
-        .addVersion(OrcFile.MINOR_VERSION);
+        .addVersion(version.getMajor())
+        .addVersion(version.getMinor());
     if (compress != CompressionKind.NONE) {
       builder.setCompressionBlockSize(bufferSize);
     }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java?rev=1518830&r1=1518829&r2=1518830&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java Thu Aug 29 21:23:02 2013
@@ -108,8 +108,12 @@ public class TestNewIntegerEncoding {
           Row.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     writer.addRow(new Row(111, 1111L));
     writer.addRow(new Row(111, 1111L));
     writer.addRow(new Row(111, 1111L));
@@ -138,9 +142,12 @@ public class TestNewIntegerEncoding {
         9, 2, 6, 3, 7, 1, 9, 2, 6, 2000, 2, 1, 1, 1, 1, 1, 3, 7, 1, 9, 2, 6, 1,
         1, 1, 1, 1 };
     List<Long> input = Lists.newArrayList(Longs.asList(inp));
-    conf.set("hive.exec.orc.write.format", "0.11");
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .compress(CompressionKind.NONE)
+                                         .version(OrcFile.Version.V_0_11)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -170,8 +177,12 @@ public class TestNewIntegerEncoding {
         1, 1, 1, 1 };
     List<Long> input = Lists.newArrayList(Longs.asList(inp));
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -197,8 +208,12 @@ public class TestNewIntegerEncoding {
     long[] inp = new long[] { -500, -400, -350, -325, -310 };
     List<Long> input = Lists.newArrayList(Longs.asList(inp));
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -224,8 +239,12 @@ public class TestNewIntegerEncoding {
     long[] inp = new long[] { -500, -600, -650, -675, -710 };
     List<Long> input = Lists.newArrayList(Longs.asList(inp));
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -251,8 +270,12 @@ public class TestNewIntegerEncoding {
     long[] inp = new long[] { 500, 400, 350, 325, 310 };
     List<Long> input = Lists.newArrayList(Longs.asList(inp));
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -278,8 +301,12 @@ public class TestNewIntegerEncoding {
     long[] inp = new long[] { 500, 600, 650, 675, 710 };
     List<Long> input = Lists.newArrayList(Longs.asList(inp));
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -305,8 +332,11 @@ public class TestNewIntegerEncoding {
     List<Long> input = Lists.newArrayList();
     input.add((long) Integer.MIN_VALUE);
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.ZLIB, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -332,8 +362,12 @@ public class TestNewIntegerEncoding {
     List<Long> input = Lists.newArrayList();
     input.add((long) Integer.MAX_VALUE);
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -359,8 +393,12 @@ public class TestNewIntegerEncoding {
     List<Long> input = Lists.newArrayList();
     input.add(Long.MIN_VALUE);
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -386,8 +424,12 @@ public class TestNewIntegerEncoding {
     List<Long> input = Lists.newArrayList();
     input.add(Long.MAX_VALUE);
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -416,8 +458,12 @@ public class TestNewIntegerEncoding {
       input.add((long) rand.nextInt());
     }
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -446,8 +492,12 @@ public class TestNewIntegerEncoding {
       input.add(rand.nextLong());
     }
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -484,8 +534,12 @@ public class TestNewIntegerEncoding {
         2, 16 };
     List<Long> input = Lists.newArrayList(Longs.asList(inp));
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -522,8 +576,12 @@ public class TestNewIntegerEncoding {
         2, 16 };
     List<Long> input = Lists.newArrayList(Longs.asList(inp));
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -560,8 +618,12 @@ public class TestNewIntegerEncoding {
         2, 16 };
     List<Long> input = Lists.newArrayList(Longs.asList(inp));
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -589,8 +651,12 @@ public class TestNewIntegerEncoding {
         6, 8, 7, 9, 9, 11, 33, 11, 3, 7, 4, 6, 10, 14, 12, 5, 14, 7, 6 };
     List<Long> input = Lists.newArrayList(Longs.asList(inp));
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -620,8 +686,12 @@ public class TestNewIntegerEncoding {
     }
     input.set(0, 20000L);
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -651,8 +721,12 @@ public class TestNewIntegerEncoding {
     }
     input.set(1, 20000L);
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -682,8 +756,11 @@ public class TestNewIntegerEncoding {
     }
     input.set(255, 20000L);
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.ZLIB, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -713,8 +790,11 @@ public class TestNewIntegerEncoding {
     }
     input.set(256, 20000L);
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.ZLIB, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -744,8 +824,11 @@ public class TestNewIntegerEncoding {
     }
     input.set(510, 20000L);
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.ZLIB, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -775,8 +858,11 @@ public class TestNewIntegerEncoding {
     }
     input.set(511, 20000L);
 
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.ZLIB, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .bufferSize(10000));
     for(Long l : input) {
       writer.addRow(l);
     }
@@ -804,9 +890,13 @@ public class TestNewIntegerEncoding {
     for(int i = 0; i < 100000; i++) {
       input.add((long) rand.nextInt());
     }
-    conf.set("hive.exec.orc.write.format", "0.11");
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .compress(CompressionKind.NONE)
+                                         .stripeSize(100000)
+                                         .bufferSize(10000)
+                                         .version(OrcFile.Version.V_0_11));
     for(Long l : input) {
       writer.addRow(l);
     }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1518830&r1=1518829&r2=1518830&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Thu Aug 29 21:23:02 2013
@@ -193,8 +193,11 @@ public class TestOrcFile {
       inspector = ObjectInspectorFactory.getReflectionObjectInspector
           (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.ZLIB, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .bufferSize(10000));
     writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536,
         Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0,1,2,3,4), "hi",
         new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
@@ -425,8 +428,13 @@ public class TestOrcFile {
           (InnerStruct.class,
               ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        1000, CompressionKind.NONE, 100, 1000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(1000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(100)
+                                         .rowIndexStride(1000));
     Random r1 = new Random(1);
     Random r2 = new Random(2);
     int x;
@@ -508,8 +516,12 @@ public class TestOrcFile {
       inspector = ObjectInspectorFactory.getReflectionObjectInspector
           (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        1000, CompressionKind.NONE, 100, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(1000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(100));
     writer.close();
     Reader reader = OrcFile.createReader(fs, testFilePath);
     assertEquals(false, reader.rows(null).hasNext());
@@ -528,9 +540,14 @@ public class TestOrcFile {
       inspector = ObjectInspectorFactory.getReflectionObjectInspector
           (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        1000, CompressionKind.NONE, 100, 10000);
-    writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128));
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(1000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(100));
+    writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127,
+                                              -128));
     writer.addUserMetadata("clobber", byteBuf(1,2,3));
     writer.addUserMetadata("clobber", byteBuf(4,3,2,1));
     ByteBuffer bigBuf = ByteBuffer.allocate(40000);
@@ -594,8 +611,13 @@ public class TestOrcFile {
       inspector = OrcStruct.createObjectInspector(0, types);
     }
     HiveDecimal maxValue = new HiveDecimal("100000000000000000000");
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        1000, CompressionKind.NONE, 100, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(1000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(100)
+                                         .blockPadding(false));
     OrcStruct row = new OrcStruct(3);
     OrcUnion union = new OrcUnion();
     row.setFieldValue(1, union);
@@ -773,8 +795,12 @@ public class TestOrcFile {
           (InnerStruct.class,
               ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        1000, CompressionKind.SNAPPY, 100, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(1000)
+                                         .compress(CompressionKind.SNAPPY)
+                                         .bufferSize(100));
     Random rand = new Random(12);
     for(int i=0; i < 10000; ++i) {
       writer.addRow(new InnerStruct(rand.nextInt(),
@@ -808,8 +834,13 @@ public class TestOrcFile {
           (InnerStruct.class,
               ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        5000, CompressionKind.SNAPPY, 1000, 0);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(5000)
+                                         .compress(CompressionKind.SNAPPY)
+                                         .bufferSize(1000)
+                                         .rowIndexStride(0));
     Random rand = new Random(24);
     for(int i=0; i < 10000; ++i) {
       InnerStruct row = new InnerStruct(rand.nextInt(),
@@ -849,8 +880,12 @@ public class TestOrcFile {
       inspector = ObjectInspectorFactory.getReflectionObjectInspector
           (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        200000, CompressionKind.ZLIB, 65536, 1000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(200000)
+                                         .bufferSize(65536)
+                                         .rowIndexStride(1000));
     Random rand = new Random(42);
     final int COUNT=32768;
     long[] intValues= new long[COUNT];
@@ -1032,8 +1067,14 @@ public class TestOrcFile {
               ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
     MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
-    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
-        50000, CompressionKind.NONE, 100, 0, memory);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .compress(CompressionKind.NONE)
+                                         .stripeSize(50000)
+                                         .bufferSize(100)
+                                         .rowIndexStride(0)
+                                         .memory(memory));
     assertEquals(testFilePath, memory.path);
     for(int i=0; i < 2500; ++i) {
       writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i)));

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java?rev=1518830&r1=1518829&r2=1518830&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcNullOptimization.java Thu Aug 29 21:23:02 2013
@@ -91,15 +91,21 @@ public class TestOrcNullOptimization {
       inspector = ObjectInspectorFactory.getReflectionObjectInspector
           (MyStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     Random rand = new Random(100);
-    writer.addRow(new MyStruct(null, null, true, Lists.newArrayList(new InnerStruct(100))));
+    writer.addRow(new MyStruct(null, null, true,
+                               Lists.newArrayList(new InnerStruct(100))));
     for (int i = 2; i < 20000; i++) {
       writer.addRow(new MyStruct(rand.nextInt(1), "a", true, Lists
           .newArrayList(new InnerStruct(100))));
     }
-    writer.addRow(new MyStruct(null, null, true, Lists.newArrayList(new InnerStruct(100))));
+    writer.addRow(new MyStruct(null, null, true,
+                               Lists.newArrayList(new InnerStruct(100))));
     writer.close();
 
     Reader reader = OrcFile.createReader(fs, testFilePath);
@@ -117,7 +123,8 @@ public class TestOrcNullOptimization {
 
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
-    assertEquals(19998, ((StringColumnStatistics) stats[2]).getNumberOfValues());
+    assertEquals(19998,
+                 ((StringColumnStatistics) stats[2]).getNumberOfValues());
     assertEquals("count: 19998 min: a max: a",
         stats[2].toString());
 
@@ -142,8 +149,10 @@ public class TestOrcNullOptimization {
     List<Boolean> got = Lists.newArrayList();
     // check if the strip footer contains PRESENT stream
     for (StripeInformation sinfo : reader.getStripes()) {
-      OrcProto.StripeFooter sf = ((RecordReaderImpl) rows).readStripeFooter(sinfo);
-      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString()) != -1);
+      OrcProto.StripeFooter sf =
+        ((RecordReaderImpl) rows).readStripeFooter(sinfo);
+      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
+              != -1);
     }
     assertEquals(expected, got);
 
@@ -154,7 +163,8 @@ public class TestOrcNullOptimization {
     assertNull(row.getFieldValue(1));
     assertEquals(new BooleanWritable(true), row.getFieldValue(2));
     assertEquals(new IntWritable(100),
-        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).getFieldValue(0));
+        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).
+                 getFieldValue(0));
 
     rows.seekToRow(19998);
     // last-1 row
@@ -164,7 +174,8 @@ public class TestOrcNullOptimization {
     assertEquals(new IntWritable(0), row.getFieldValue(0));
     assertEquals(new BooleanWritable(true), row.getFieldValue(2));
     assertEquals(new IntWritable(100),
-        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).getFieldValue(0));
+        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).
+                 getFieldValue(0));
 
     // last row
     row = (OrcStruct) rows.next(row);
@@ -173,7 +184,8 @@ public class TestOrcNullOptimization {
     assertNull(row.getFieldValue(1));
     assertEquals(new BooleanWritable(true), row.getFieldValue(2));
     assertEquals(new IntWritable(100),
-        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).getFieldValue(0));
+        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).
+                 getFieldValue(0));
 
     rows.close();
   }
@@ -185,14 +197,19 @@ public class TestOrcNullOptimization {
       inspector = ObjectInspectorFactory.getReflectionObjectInspector
           (MyStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.NONE, 10000, 10000);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
     Random rand = new Random(100);
     for (int i = 1; i < 20000; i++) {
       writer.addRow(new MyStruct(rand.nextInt(1), "a", true, Lists
           .newArrayList(new InnerStruct(100))));
     }
-    writer.addRow(new MyStruct(0, "b", true, Lists.newArrayList(new InnerStruct(100))));
+    writer.addRow(new MyStruct(0, "b", true,
+                               Lists.newArrayList(new InnerStruct(100))));
     writer.close();
 
     Reader reader = OrcFile.createReader(fs, testFilePath);
@@ -210,7 +227,8 @@ public class TestOrcNullOptimization {
 
     assertEquals("b", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
-    assertEquals(20000, ((StringColumnStatistics) stats[2]).getNumberOfValues());
+    assertEquals(20000,
+                 ((StringColumnStatistics) stats[2]).getNumberOfValues());
     assertEquals("count: 20000 min: a max: b",
         stats[2].toString());
 
@@ -233,8 +251,10 @@ public class TestOrcNullOptimization {
     List<Boolean> got = Lists.newArrayList();
     // check if the strip footer contains PRESENT stream
     for (StripeInformation sinfo : reader.getStripes()) {
-      OrcProto.StripeFooter sf = ((RecordReaderImpl) rows).readStripeFooter(sinfo);
-      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString()) != -1);
+      OrcProto.StripeFooter sf =
+        ((RecordReaderImpl) rows).readStripeFooter(sinfo);
+      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
+              != -1);
     }
     assertEquals(expected, got);
 
@@ -247,7 +267,8 @@ public class TestOrcNullOptimization {
     assertEquals("a", row.getFieldValue(1).toString());
     assertEquals(new BooleanWritable(true), row.getFieldValue(2));
     assertEquals(new IntWritable(100),
-        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).getFieldValue(0));
+                 ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).
+                   getFieldValue(0));
 
     // last row
     row = (OrcStruct) rows.next(row);
@@ -257,8 +278,8 @@ public class TestOrcNullOptimization {
     assertEquals("b", row.getFieldValue(1).toString());
     assertEquals(new BooleanWritable(true), row.getFieldValue(2));
     assertEquals(new IntWritable(100),
-        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).getFieldValue(0));
-
+                 ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).
+                   getFieldValue(0));
     rows.close();
   }
 
@@ -269,16 +290,27 @@ public class TestOrcNullOptimization {
       inspector = ObjectInspectorFactory.getReflectionObjectInspector
           (MyStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
-    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
-        100000, CompressionKind.ZLIB, 10000, 10000);
-    writer.addRow(new MyStruct(3, "a", true, Lists.newArrayList(new InnerStruct(100))));
-    writer.addRow(new MyStruct(null, "b", true, Lists.newArrayList(new InnerStruct(100))));
-    writer.addRow(new MyStruct(3, null, false, Lists.newArrayList(new InnerStruct(100))));
-    writer.addRow(new MyStruct(3, "d", true, Lists.newArrayList(new InnerStruct(100))));
-    writer.addRow(new MyStruct(2, "e", true, Lists.newArrayList(new InnerStruct(100))));
-    writer.addRow(new MyStruct(2, "f", true, Lists.newArrayList(new InnerStruct(100))));
-    writer.addRow(new MyStruct(2, "g", true, Lists.newArrayList(new InnerStruct(100))));
-    writer.addRow(new MyStruct(2, "h", true, Lists.newArrayList(new InnerStruct(100))));
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .stripeSize(100000)
+                                         .bufferSize(10000));
+    writer.addRow(new MyStruct(3, "a", true,
+                               Lists.newArrayList(new InnerStruct(100))));
+    writer.addRow(new MyStruct(null, "b", true,
+                               Lists.newArrayList(new InnerStruct(100))));
+    writer.addRow(new MyStruct(3, null, false,
+                               Lists.newArrayList(new InnerStruct(100))));
+    writer.addRow(new MyStruct(3, "d", true,
+                               Lists.newArrayList(new InnerStruct(100))));
+    writer.addRow(new MyStruct(2, "e", true,
+                               Lists.newArrayList(new InnerStruct(100))));
+    writer.addRow(new MyStruct(2, "f", true,
+                               Lists.newArrayList(new InnerStruct(100))));
+    writer.addRow(new MyStruct(2, "g", true,
+                               Lists.newArrayList(new InnerStruct(100))));
+    writer.addRow(new MyStruct(2, "h", true,
+                               Lists.newArrayList(new InnerStruct(100))));
     writer.close();
 
     Reader reader = OrcFile.createReader(fs, testFilePath);
@@ -319,8 +351,10 @@ public class TestOrcNullOptimization {
     List<Boolean> got = Lists.newArrayList();
     // check if the strip footer contains PRESENT stream
     for (StripeInformation sinfo : reader.getStripes()) {
-      OrcProto.StripeFooter sf = ((RecordReaderImpl) rows).readStripeFooter(sinfo);
-      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString()) != -1);
+      OrcProto.StripeFooter sf =
+        ((RecordReaderImpl) rows).readStripeFooter(sinfo);
+      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
+              != -1);
     }
     assertEquals(expected, got);
 
@@ -331,7 +365,8 @@ public class TestOrcNullOptimization {
     assertEquals("a", row.getFieldValue(1).toString());
     assertEquals(new BooleanWritable(true), row.getFieldValue(2));
     assertEquals(new IntWritable(100),
-        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).getFieldValue(0));
+        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).
+                 getFieldValue(0));
 
     // row 2
     row = (OrcStruct) rows.next(row);
@@ -340,7 +375,8 @@ public class TestOrcNullOptimization {
     assertEquals("b", row.getFieldValue(1).toString());
     assertEquals(new BooleanWritable(true), row.getFieldValue(2));
     assertEquals(new IntWritable(100),
-        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).getFieldValue(0));
+        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).
+                 getFieldValue(0));
 
     // row 3
     row = (OrcStruct) rows.next(row);
@@ -349,7 +385,8 @@ public class TestOrcNullOptimization {
     assertEquals(new IntWritable(3), row.getFieldValue(0));
     assertEquals(new BooleanWritable(false), row.getFieldValue(2));
     assertEquals(new IntWritable(100),
-        ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).getFieldValue(0));
+                 ((OrcStruct) ((ArrayList<?>) row.getFieldValue(3)).get(0)).
+                 getFieldValue(0));
     rows.close();
   }
 }

Modified: hive/trunk/ql/src/test/resources/orc-file-dump.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/resources/orc-file-dump.out?rev=1518830&r1=1518829&r2=1518830&view=diff
==============================================================================
--- hive/trunk/ql/src/test/resources/orc-file-dump.out (original)
+++ hive/trunk/ql/src/test/resources/orc-file-dump.out Thu Aug 29 21:23:02 2013
@@ -53,31 +53,31 @@ Stripes:
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
     Encoding column 3: DICTIONARY_V2
-  Stripe: offset: 191873 data: 63796 rows: 5000 tail: 74 index: 119
-    Stream: column 0 section ROW_INDEX start: 191873 length 10
-    Stream: column 1 section ROW_INDEX start: 191883 length 35
-    Stream: column 2 section ROW_INDEX start: 191918 length 39
-    Stream: column 3 section ROW_INDEX start: 191957 length 35
-    Stream: column 1 section DATA start: 191992 length 20029
-    Stream: column 2 section DATA start: 212021 length 40035
-    Stream: column 3 section DATA start: 252056 length 3574
-    Stream: column 3 section LENGTH start: 255630 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 255655 length 133
+  Stripe: offset: 200000 data: 63796 rows: 5000 tail: 74 index: 119
+    Stream: column 0 section ROW_INDEX start: 200000 length 10
+    Stream: column 1 section ROW_INDEX start: 200010 length 35
+    Stream: column 2 section ROW_INDEX start: 200045 length 39
+    Stream: column 3 section ROW_INDEX start: 200084 length 35
+    Stream: column 1 section DATA start: 200119 length 20029
+    Stream: column 2 section DATA start: 220148 length 40035
+    Stream: column 3 section DATA start: 260183 length 3574
+    Stream: column 3 section LENGTH start: 263757 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 263782 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
     Encoding column 3: DICTIONARY_V2
-  Stripe: offset: 255862 data: 12940 rows: 1000 tail: 71 index: 120
-    Stream: column 0 section ROW_INDEX start: 255862 length 10
-    Stream: column 1 section ROW_INDEX start: 255872 length 36
-    Stream: column 2 section ROW_INDEX start: 255908 length 39
-    Stream: column 3 section ROW_INDEX start: 255947 length 35
-    Stream: column 1 section DATA start: 255982 length 4007
-    Stream: column 2 section DATA start: 259989 length 8007
-    Stream: column 3 section DATA start: 267996 length 768
-    Stream: column 3 section LENGTH start: 268764 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 268789 length 133
+  Stripe: offset: 263989 data: 12940 rows: 1000 tail: 71 index: 120
+    Stream: column 0 section ROW_INDEX start: 263989 length 10
+    Stream: column 1 section ROW_INDEX start: 263999 length 36
+    Stream: column 2 section ROW_INDEX start: 264035 length 39
+    Stream: column 3 section ROW_INDEX start: 264074 length 35
+    Stream: column 1 section DATA start: 264109 length 4007
+    Stream: column 2 section DATA start: 268116 length 8007
+    Stream: column 3 section DATA start: 276123 length 768
+    Stream: column 3 section LENGTH start: 276891 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 276916 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
-    Encoding column 3: DICTIONARY_V2
\ No newline at end of file
+    Encoding column 3: DICTIONARY_V2



Mime
View raw message