hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject [23/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.
Date Wed, 19 Jul 2017 16:58:46 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/IntegerColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/IntegerColumnStatistics.java b/orc/src/java/org/apache/orc/IntegerColumnStatistics.java
deleted file mode 100644
index 1a162ff..0000000
--- a/orc/src/java/org/apache/orc/IntegerColumnStatistics.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for all of the integer columns, such as byte, short, int, and
- * long.
- */
-public interface IntegerColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the smallest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the minimum
-   */
-  long getMinimum();
-
-  /**
-   * Get the largest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the maximum
-   */
-  long getMaximum();
-
-  /**
-   * Is the sum defined? If the sum overflowed the counter this will be false.
-   * @return is the sum available
-   */
-  boolean isSumDefined();
-
-  /**
-   * Get the sum of the column. Only valid if isSumDefined returns true.
-   * @return the sum of the column
-   */
-  long getSum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/OrcConf.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcConf.java b/orc/src/java/org/apache/orc/OrcConf.java
deleted file mode 100644
index 357318d..0000000
--- a/orc/src/java/org/apache/orc/OrcConf.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.hadoop.conf.Configuration;
-
-import java.util.Properties;
-
-/**
- * Define the configuration properties that Orc understands.
- */
-public enum OrcConf {
-  STRIPE_SIZE("orc.stripe.size", "hive.exec.orc.default.stripe.size",
-      64L * 1024 * 1024,
-      "Define the default ORC stripe size, in bytes."),
-  BLOCK_SIZE("orc.block.size", "hive.exec.orc.default.block.size",
-      256L * 1024 * 1024,
-      "Define the default file system block size for ORC files."),
-  ENABLE_INDEXES("orc.create.index", "orc.create.index", true,
-      "Should the ORC writer create indexes as part of the file."),
-  ROW_INDEX_STRIDE("orc.row.index.stride",
-      "hive.exec.orc.default.row.index.stride", 10000,
-      "Define the default ORC index stride in number of rows. (Stride is the\n"+
-          " number of rows n index entry represents.)"),
-  BUFFER_SIZE("orc.compress.size", "hive.exec.orc.default.buffer.size",
-      256 * 1024, "Define the default ORC buffer size, in bytes."),
-  BASE_DELTA_RATIO("orc.base.delta.ratio", "hive.exec.orc.base.delta.ratio", 8,
-      "The ratio of base writer and delta writer in terms of STRIPE_SIZE and BUFFER_SIZE."),
-  BLOCK_PADDING("orc.block.padding", "hive.exec.orc.default.block.padding",
-      true,
-      "Define whether stripes should be padded to the HDFS block boundaries."),
-  COMPRESS("orc.compress", "hive.exec.orc.default.compress", "ZLIB",
-      "Define the default compression codec for ORC file"),
-  WRITE_FORMAT("orc.write.format", "hive.exec.orc.write.format", "0.12",
-      "Define the version of the file to write. Possible values are 0.11 and\n"+
-          " 0.12. If this parameter is not defined, ORC will use the run\n" +
-          " length encoding (RLE) introduced in Hive 0.12."),
-  ENCODING_STRATEGY("orc.encoding.strategy", "hive.exec.orc.encoding.strategy",
-      "SPEED",
-      "Define the encoding strategy to use while writing data. Changing this\n"+
-          "will only affect the light weight encoding for integers. This\n" +
-          "flag will not change the compression level of higher level\n" +
-          "compression codec (like ZLIB)."),
-  COMPRESSION_STRATEGY("orc.compression.strategy",
-      "hive.exec.orc.compression.strategy", "SPEED",
-      "Define the compression strategy to use while writing data.\n" +
-          "This changes the compression level of higher level compression\n" +
-          "codec (like ZLIB)."),
-  BLOCK_PADDING_TOLERANCE("orc.block.padding.tolerance",
-      "hive.exec.orc.block.padding.tolerance", 0.05,
-      "Define the tolerance for block padding as a decimal fraction of\n" +
-          "stripe size (for example, the default value 0.05 is 5% of the\n" +
-          "stripe size). For the defaults of 64Mb ORC stripe and 256Mb HDFS\n" +
-          "blocks, the default block padding tolerance of 5% will\n" +
-          "reserve a maximum of 3.2Mb for padding within the 256Mb block.\n" +
-          "In that case, if the available size within the block is more than\n"+
-          "3.2Mb, a new smaller stripe will be inserted to fit within that\n" +
-          "space. This will make sure that no stripe written will block\n" +
-          " boundaries and cause remote reads within a node local task."),
-  BLOOM_FILTER_FPP("orc.bloom.filter.fpp", "orc.default.bloom.fpp", 0.05,
-      "Define the default false positive probability for bloom filters."),
-  USE_ZEROCOPY("orc.use.zerocopy", "hive.exec.orc.zerocopy", false,
-      "Use zerocopy reads with ORC. (This requires Hadoop 2.3 or later.)"),
-  SKIP_CORRUPT_DATA("orc.skip.corrupt.data", "hive.exec.orc.skip.corrupt.data",
-      false,
-      "If ORC reader encounters corrupt data, this value will be used to\n" +
-          "determine whether to skip the corrupt data or throw exception.\n" +
-          "The default behavior is to throw exception."),
-  MEMORY_POOL("orc.memory.pool", "hive.exec.orc.memory.pool", 0.5,
-      "Maximum fraction of heap that can be used by ORC file writers"),
-  DICTIONARY_KEY_SIZE_THRESHOLD("orc.dictionary.key.threshold",
-      "hive.exec.orc.dictionary.key.size.threshold",
-      0.8,
-      "If the number of distinct keys in a dictionary is greater than this\n" +
-          "fraction of the total number of non-null rows, turn off \n" +
-          "dictionary encoding.  Use 1 to always use dictionary encoding."),
-  ROW_INDEX_STRIDE_DICTIONARY_CHECK("orc.dictionary.early.check",
-      "hive.orc.row.index.stride.dictionary.check",
-      true,
-      "If enabled dictionary check will happen after first row index stride\n" +
-          "(default 10000 rows) else dictionary check will happen before\n" +
-          "writing first stripe. In both cases, the decision to use\n" +
-          "dictionary or not will be retained thereafter."),
-  BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns", "orc.bloom.filter.columns",
-      "", "List of columns to create bloom filters for when writing.")
-  ;
-
-  private final String attribute;
-  private final String hiveConfName;
-  private final Object defaultValue;
-  private final String description;
-
-  OrcConf(String attribute,
-          String hiveConfName,
-          Object defaultValue,
-          String description) {
-    this.attribute = attribute;
-    this.hiveConfName = hiveConfName;
-    this.defaultValue = defaultValue;
-    this.description = description;
-  }
-
-  public String getAttribute() {
-    return attribute;
-  }
-
-  public String getHiveConfName() {
-    return hiveConfName;
-  }
-
-  public Object getDefaultValue() {
-    return defaultValue;
-  }
-
-  public String getDescription() {
-    return description;
-  }
-
-  private String lookupValue(Properties tbl, Configuration conf) {
-    String result = null;
-    if (tbl != null) {
-      result = tbl.getProperty(attribute);
-    }
-    if (result == null && conf != null) {
-      result = conf.get(attribute);
-      if (result == null) {
-        result = conf.get(hiveConfName);
-      }
-    }
-    return result;
-  }
-
-  public long getLong(Properties tbl, Configuration conf) {
-    String value = lookupValue(tbl, conf);
-    if (value != null) {
-      return Long.parseLong(value);
-    }
-    return ((Number) defaultValue).longValue();
-  }
-
-  public long getLong(Configuration conf) {
-    return getLong(null, conf);
-  }
-
-  public String getString(Properties tbl, Configuration conf) {
-    String value = lookupValue(tbl, conf);
-    return value == null ? (String) defaultValue : value;
-  }
-
-  public String getString(Configuration conf) {
-    return getString(null, conf);
-  }
-
-  public boolean getBoolean(Properties tbl, Configuration conf) {
-    String value = lookupValue(tbl, conf);
-    if (value != null) {
-      return Boolean.parseBoolean(value);
-    }
-    return (Boolean) defaultValue;
-  }
-
-  public boolean getBoolean(Configuration conf) {
-    return getBoolean(null, conf);
-  }
-
-  public double getDouble(Properties tbl, Configuration conf) {
-    String value = lookupValue(tbl, conf);
-    if (value != null) {
-      return Double.parseDouble(value);
-    }
-    return ((Number) defaultValue).doubleValue();
-  }
-
-  public double getDouble(Configuration conf) {
-    return getDouble(null, conf);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcFile.java b/orc/src/java/org/apache/orc/OrcFile.java
deleted file mode 100644
index 06fb666..0000000
--- a/orc/src/java/org/apache/orc/OrcFile.java
+++ /dev/null
@@ -1,574 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.io.IOException;
-import java.util.Properties;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.orc.impl.MemoryManager;
-import org.apache.orc.impl.OrcTail;
-import org.apache.orc.impl.ReaderImpl;
-import org.apache.orc.impl.WriterImpl;
-
-/**
- * Contains factory methods to read or write ORC files.
- */
-public class OrcFile {
-  public static final String MAGIC = "ORC";
-
-  /**
-   * Create a version number for the ORC file format, so that we can add
-   * non-forward compatible changes in the future. To make it easier for users
-   * to understand the version numbers, we use the Hive release number that
-   * first wrote that version of ORC files.
-   *
-   * Thus, if you add new encodings or other non-forward compatible changes
-   * to ORC files, which prevent the old reader from reading the new format,
-   * you should change these variable to reflect the next Hive release number.
-   * Non-forward compatible changes should never be added in patch releases.
-   *
-   * Do not make any changes that break backwards compatibility, which would
-   * prevent the new reader from reading ORC files generated by any released
-   * version of Hive.
-   */
-  public enum Version {
-    V_0_11("0.11", 0, 11),
-    V_0_12("0.12", 0, 12);
-
-    public static final Version CURRENT = V_0_12;
-
-    private final String name;
-    private final int major;
-    private final int minor;
-
-    Version(String name, int major, int minor) {
-      this.name = name;
-      this.major = major;
-      this.minor = minor;
-    }
-
-    public static Version byName(String name) {
-      for(Version version: values()) {
-        if (version.name.equals(name)) {
-          return version;
-        }
-      }
-      throw new IllegalArgumentException("Unknown ORC version " + name);
-    }
-
-    /**
-     * Get the human readable name for the version.
-     */
-    public String getName() {
-      return name;
-    }
-
-    /**
-     * Get the major version number.
-     */
-    public int getMajor() {
-      return major;
-    }
-
-    /**
-     * Get the minor version number.
-     */
-    public int getMinor() {
-      return minor;
-    }
-  }
-
-  /**
-   * Records the version of the writer in terms of which bugs have been fixed.
-   * For bugs in the writer, but the old readers already read the new data
-   * correctly, bump this version instead of the Version.
-   */
-  public enum WriterVersion {
-    ORIGINAL(0),
-    HIVE_8732(1), // corrupted stripe/file maximum column statistics
-    HIVE_4243(2), // use real column names from Hive tables
-    HIVE_12055(3), // vectorized writer
-    HIVE_13083(4), // decimal writer updating present stream wrongly
-
-    // Don't use any magic numbers here except for the below:
-    FUTURE(Integer.MAX_VALUE); // a version from a future writer
-
-    private final int id;
-
-    public int getId() {
-      return id;
-    }
-
-    WriterVersion(int id) {
-      this.id = id;
-    }
-
-    private static final WriterVersion[] values;
-    static {
-      // Assumes few non-negative values close to zero.
-      int max = Integer.MIN_VALUE;
-      for (WriterVersion v : WriterVersion.values()) {
-        if (v.id < 0) throw new AssertionError();
-        if (v.id > max && FUTURE.id != v.id) {
-          max = v.id;
-        }
-      }
-      values = new WriterVersion[max + 1];
-      for (WriterVersion v : WriterVersion.values()) {
-        if (v.id < values.length) {
-          values[v.id] = v;
-        }
-      }
-    }
-
-    /**
-     * Convert the integer from OrcProto.PostScript.writerVersion
-     * to the enumeration with unknown versions being mapped to FUTURE.
-     * @param val the serialized writer version
-     * @return the corresponding enumeration value
-     */
-    public static WriterVersion from(int val) {
-      if (val >= values.length) {
-        return FUTURE;
-      }
-      return values[val];
-    }
-  }
-  public static final WriterVersion CURRENT_WRITER = WriterVersion.HIVE_13083;
-
-  public enum EncodingStrategy {
-    SPEED, COMPRESSION
-  }
-
-  public enum CompressionStrategy {
-    SPEED, COMPRESSION
-  }
-
-  // unused
-  protected OrcFile() {}
-
-  public static class ReaderOptions {
-    private final Configuration conf;
-    private FileSystem filesystem;
-    private long maxLength = Long.MAX_VALUE;
-    private OrcTail orcTail;
-    // TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface
-    // and remove this class altogether. Both footer caching and llap caching just needs OrcTail.
-    // For now keeping this around to avoid complex surgery
-    private FileMetadata fileMetadata;
-
-    public ReaderOptions(Configuration conf) {
-      this.conf = conf;
-    }
-
-    public ReaderOptions filesystem(FileSystem fs) {
-      this.filesystem = fs;
-      return this;
-    }
-
-    public ReaderOptions maxLength(long val) {
-      maxLength = val;
-      return this;
-    }
-
-    public ReaderOptions orcTail(OrcTail tail) {
-      this.orcTail = tail;
-      return this;
-    }
-
-    public Configuration getConfiguration() {
-      return conf;
-    }
-
-    public FileSystem getFilesystem() {
-      return filesystem;
-    }
-
-    public long getMaxLength() {
-      return maxLength;
-    }
-
-    public OrcTail getOrcTail() {
-      return orcTail;
-    }
-
-    public ReaderOptions fileMetadata(final FileMetadata metadata) {
-      fileMetadata = metadata;
-      return this;
-    }
-
-    public FileMetadata getFileMetadata() {
-      return fileMetadata;
-    }
-  }
-
-  public static ReaderOptions readerOptions(Configuration conf) {
-    return new ReaderOptions(conf);
-  }
-
-  public static Reader createReader(Path path,
-                                    ReaderOptions options) throws IOException {
-    return new ReaderImpl(path, options);
-  }
-
-  public interface WriterContext {
-    Writer getWriter();
-  }
-
-  public interface WriterCallback {
-    void preStripeWrite(WriterContext context) throws IOException;
-    void preFooterWrite(WriterContext context) throws IOException;
-  }
-
-  /**
-   * Options for creating ORC file writers.
-   */
-  public static class WriterOptions {
-    private final Configuration configuration;
-    private FileSystem fileSystemValue = null;
-    private TypeDescription schema = null;
-    private long stripeSizeValue;
-    private long blockSizeValue;
-    private int rowIndexStrideValue;
-    private int bufferSizeValue;
-    private boolean enforceBufferSize = false;
-    private boolean blockPaddingValue;
-    private CompressionKind compressValue;
-    private MemoryManager memoryManagerValue;
-    private Version versionValue;
-    private WriterCallback callback;
-    private EncodingStrategy encodingStrategy;
-    private CompressionStrategy compressionStrategy;
-    private double paddingTolerance;
-    private String bloomFilterColumns;
-    private double bloomFilterFpp;
-
-    protected WriterOptions(Properties tableProperties, Configuration conf) {
-      configuration = conf;
-      memoryManagerValue = getStaticMemoryManager(conf);
-      stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf);
-      blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf);
-      rowIndexStrideValue =
-          (int) OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf);
-      bufferSizeValue = (int) OrcConf.BUFFER_SIZE.getLong(tableProperties,
-          conf);
-      blockPaddingValue =
-          OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf);
-      compressValue =
-          CompressionKind.valueOf(OrcConf.COMPRESS.getString(tableProperties,
-              conf).toUpperCase());
-      String versionName = OrcConf.WRITE_FORMAT.getString(tableProperties,
-          conf);
-      versionValue = Version.byName(versionName);
-      String enString = OrcConf.ENCODING_STRATEGY.getString(tableProperties,
-          conf);
-      encodingStrategy = EncodingStrategy.valueOf(enString);
-
-      String compString =
-          OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf);
-      compressionStrategy = CompressionStrategy.valueOf(compString);
-
-      paddingTolerance =
-          OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf);
-
-      bloomFilterColumns = OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties,
-          conf);
-      bloomFilterFpp = OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties,
-          conf);
-    }
-
-    /**
-     * Provide the filesystem for the path, if the client has it available.
-     * If it is not provided, it will be found from the path.
-     */
-    public WriterOptions fileSystem(FileSystem value) {
-      fileSystemValue = value;
-      return this;
-    }
-
-    /**
-     * Set the stripe size for the file. The writer stores the contents of the
-     * stripe in memory until this memory limit is reached and the stripe
-     * is flushed to the HDFS file and the next stripe started.
-     */
-    public WriterOptions stripeSize(long value) {
-      stripeSizeValue = value;
-      return this;
-    }
-
-    /**
-     * Set the file system block size for the file. For optimal performance,
-     * set the block size to be multiple factors of stripe size.
-     */
-    public WriterOptions blockSize(long value) {
-      blockSizeValue = value;
-      return this;
-    }
-
-    /**
-     * Set the distance between entries in the row index. The minimum value is
-     * 1000 to prevent the index from overwhelming the data. If the stride is
-     * set to 0, no indexes will be included in the file.
-     */
-    public WriterOptions rowIndexStride(int value) {
-      rowIndexStrideValue = value;
-      return this;
-    }
-
-    /**
-     * The size of the memory buffers used for compressing and storing the
-     * stripe in memory. NOTE: ORC writer may choose to use smaller buffer
-     * size based on stripe size and number of columns for efficient stripe
-     * writing and memory utilization. To enforce writer to use the requested
-     * buffer size use enforceBufferSize().
-     */
-    public WriterOptions bufferSize(int value) {
-      bufferSizeValue = value;
-      return this;
-    }
-
-    /**
-     * Enforce writer to use requested buffer size instead of estimating
-     * buffer size based on stripe size and number of columns.
-     * See bufferSize() method for more info.
-     * Default: false
-     */
-    public WriterOptions enforceBufferSize() {
-      enforceBufferSize = true;
-      return this;
-    }
-
-    /**
-     * Sets whether the HDFS blocks are padded to prevent stripes from
-     * straddling blocks. Padding improves locality and thus the speed of
-     * reading, but costs space.
-     */
-    public WriterOptions blockPadding(boolean value) {
-      blockPaddingValue = value;
-      return this;
-    }
-
-    /**
-     * Sets the encoding strategy that is used to encode the data.
-     */
-    public WriterOptions encodingStrategy(EncodingStrategy strategy) {
-      encodingStrategy = strategy;
-      return this;
-    }
-
-    /**
-     * Sets the tolerance for block padding as a percentage of stripe size.
-     */
-    public WriterOptions paddingTolerance(double value) {
-      paddingTolerance = value;
-      return this;
-    }
-
-    /**
-     * Comma separated values of column names for which bloom filter is to be created.
-     */
-    public WriterOptions bloomFilterColumns(String columns) {
-      bloomFilterColumns = columns;
-      return this;
-    }
-
-    /**
-     * Specify the false positive probability for bloom filter.
-     * @param fpp - false positive probability
-     * @return this
-     */
-    public WriterOptions bloomFilterFpp(double fpp) {
-      bloomFilterFpp = fpp;
-      return this;
-    }
-
-    /**
-     * Sets the generic compression that is used to compress the data.
-     */
-    public WriterOptions compress(CompressionKind value) {
-      compressValue = value;
-      return this;
-    }
-
-    /**
-     * Set the schema for the file. This is a required parameter.
-     * @param schema the schema for the file.
-     * @return this
-     */
-    public WriterOptions setSchema(TypeDescription schema) {
-      this.schema = schema;
-      return this;
-    }
-
-    /**
-     * Sets the version of the file that will be written.
-     */
-    public WriterOptions version(Version value) {
-      versionValue = value;
-      return this;
-    }
-
-    /**
-     * Add a listener for when the stripe and file are about to be closed.
-     * @param callback the object to be called when the stripe is closed
-     * @return this
-     */
-    public WriterOptions callback(WriterCallback callback) {
-      this.callback = callback;
-      return this;
-    }
-
-    /**
-     * A package local option to set the memory manager.
-     */
-    protected WriterOptions memory(MemoryManager value) {
-      memoryManagerValue = value;
-      return this;
-    }
-
-    public boolean getBlockPadding() {
-      return blockPaddingValue;
-    }
-
-    public long getBlockSize() {
-      return blockSizeValue;
-    }
-
-    public String getBloomFilterColumns() {
-      return bloomFilterColumns;
-    }
-
-    public FileSystem getFileSystem() {
-      return fileSystemValue;
-    }
-
-    public Configuration getConfiguration() {
-      return configuration;
-    }
-
-    public TypeDescription getSchema() {
-      return schema;
-    }
-
-    public long getStripeSize() {
-      return stripeSizeValue;
-    }
-
-    public CompressionKind getCompress() {
-      return compressValue;
-    }
-
-    public WriterCallback getCallback() {
-      return callback;
-    }
-
-    public Version getVersion() {
-      return versionValue;
-    }
-
-    public MemoryManager getMemoryManager() {
-      return memoryManagerValue;
-    }
-
-    public int getBufferSize() {
-      return bufferSizeValue;
-    }
-
-    public boolean isEnforceBufferSize() {
-      return enforceBufferSize;
-    }
-
-    public int getRowIndexStride() {
-      return rowIndexStrideValue;
-    }
-
-    public CompressionStrategy getCompressionStrategy() {
-      return compressionStrategy;
-    }
-
-    public EncodingStrategy getEncodingStrategy() {
-      return encodingStrategy;
-    }
-
-    public double getPaddingTolerance() {
-      return paddingTolerance;
-    }
-
-    public double getBloomFilterFpp() {
-      return bloomFilterFpp;
-    }
-  }
-
-  /**
-   * Create a set of writer options based on a configuration.
-   * @param conf the configuration to use for values
-   * @return A WriterOptions object that can be modified
-   */
-  public static WriterOptions writerOptions(Configuration conf) {
-    return new WriterOptions(null, conf);
-  }
-
-  /**
-   * Create a set of write options based on a set of table properties and
-   * configuration.
-   * @param tableProperties the properties of the table
-   * @param conf the configuration of the query
-   * @return a WriterOptions object that can be modified
-   */
-  public static WriterOptions writerOptions(Properties tableProperties,
-                                            Configuration conf) {
-    return new WriterOptions(tableProperties, conf);
-  }
-
-  private static ThreadLocal<MemoryManager> memoryManager = null;
-
-  private static synchronized MemoryManager getStaticMemoryManager(
-      final Configuration conf) {
-    if (memoryManager == null) {
-      memoryManager = new ThreadLocal<MemoryManager>() {
-        @Override
-        protected MemoryManager initialValue() {
-          return new MemoryManager(conf);
-        }
-      };
-    }
-    return memoryManager.get();
-  }
-
-  /**
-   * Create an ORC file writer. This is the public interface for creating
-   * writers going forward and new options will only be added to this method.
-   * @param path filename to write to
-   * @param opts the options
-   * @return a new ORC file writer
-   * @throws IOException
-   */
-  public static Writer createWriter(Path path,
-                                    WriterOptions opts
-                                    ) throws IOException {
-    FileSystem fs = opts.getFileSystem() == null ?
-        path.getFileSystem(opts.getConfiguration()) : opts.getFileSystem();
-
-    return new WriterImpl(fs, path, opts);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcUtils.java b/orc/src/java/org/apache/orc/OrcUtils.java
deleted file mode 100644
index 4f02926..0000000
--- a/orc/src/java/org/apache/orc/OrcUtils.java
+++ /dev/null
@@ -1,624 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.orc.OrcProto.Type.Builder;
-import org.apache.orc.impl.ReaderImpl;
-
-import com.google.common.collect.Lists;
-
-public class OrcUtils {
-
-  /**
-   * Returns selected columns as a boolean array with true value set for specified column names.
-   * The result will contain number of elements equal to flattened number of columns.
-   * For example:
-   * selectedColumns - a,b,c
-   * allColumns - a,b,c,d
-   * If column c is a complex type, say list<string> and other types are primitives then result will
-   * be [false, true, true, true, true, true, false]
-   * Index 0 is the root element of the struct which is set to false by default, index 1,2
-   * corresponds to columns a and b. Index 3,4 correspond to column c which is list<string> and
-   * index 5 correspond to column d. After flattening list<string> gets 2 columns.
-   *
-   * @param selectedColumns - comma separated list of selected column names
-   * @param schema       - object schema
-   * @return - boolean array with true value set for the specified column names
-   */
-  public static boolean[] includeColumns(String selectedColumns,
-                                         TypeDescription schema) {
-    int numFlattenedCols = schema.getMaximumId();
-    boolean[] results = new boolean[numFlattenedCols + 1];
-    if ("*".equals(selectedColumns)) {
-      Arrays.fill(results, true);
-      return results;
-    }
-    if (selectedColumns != null &&
-        schema.getCategory() == TypeDescription.Category.STRUCT) {
-      List<String> fieldNames = schema.getFieldNames();
-      List<TypeDescription> fields = schema.getChildren();
-      for (String column: selectedColumns.split((","))) {
-        TypeDescription col = findColumn(column, fieldNames, fields);
-        if (col != null) {
-          for(int i=col.getId(); i <= col.getMaximumId(); ++i) {
-            results[i] = true;
-          }
-        }
-      }
-    }
-    return results;
-  }
-
-  private static TypeDescription findColumn(String columnName,
-                                            List<String> fieldNames,
-                                            List<TypeDescription> fields) {
-    int i = 0;
-    for(String fieldName: fieldNames) {
-      if (fieldName.equalsIgnoreCase(columnName)) {
-        return fields.get(i);
-      } else {
-        i += 1;
-      }
-    }
-    return null;
-  }
-
-  public static List<OrcProto.Type> getOrcTypes(TypeDescription typeDescr) {
-    List<OrcProto.Type> result = Lists.newArrayList();
-    appendOrcTypes(result, typeDescr);
-    return result;
-  }
-
-  private static void appendOrcTypes(List<OrcProto.Type> result, TypeDescription typeDescr) {
-    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
-    List<TypeDescription> children = typeDescr.getChildren();
-    switch (typeDescr.getCategory()) {
-    case BOOLEAN:
-      type.setKind(OrcProto.Type.Kind.BOOLEAN);
-      break;
-    case BYTE:
-      type.setKind(OrcProto.Type.Kind.BYTE);
-      break;
-    case SHORT:
-      type.setKind(OrcProto.Type.Kind.SHORT);
-      break;
-    case INT:
-      type.setKind(OrcProto.Type.Kind.INT);
-      break;
-    case LONG:
-      type.setKind(OrcProto.Type.Kind.LONG);
-      break;
-    case FLOAT:
-      type.setKind(OrcProto.Type.Kind.FLOAT);
-      break;
-    case DOUBLE:
-      type.setKind(OrcProto.Type.Kind.DOUBLE);
-      break;
-    case STRING:
-      type.setKind(OrcProto.Type.Kind.STRING);
-      break;
-    case CHAR:
-      type.setKind(OrcProto.Type.Kind.CHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case VARCHAR:
-      type.setKind(OrcProto.Type.Kind.VARCHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case BINARY:
-      type.setKind(OrcProto.Type.Kind.BINARY);
-      break;
-    case TIMESTAMP:
-      type.setKind(OrcProto.Type.Kind.TIMESTAMP);
-      break;
-    case DATE:
-      type.setKind(OrcProto.Type.Kind.DATE);
-      break;
-    case DECIMAL:
-      type.setKind(OrcProto.Type.Kind.DECIMAL);
-      type.setPrecision(typeDescr.getPrecision());
-      type.setScale(typeDescr.getScale());
-      break;
-    case LIST:
-      type.setKind(OrcProto.Type.Kind.LIST);
-      type.addSubtypes(children.get(0).getId());
-      break;
-    case MAP:
-      type.setKind(OrcProto.Type.Kind.MAP);
-      for(TypeDescription t: children) {
-        type.addSubtypes(t.getId());
-      }
-      break;
-    case STRUCT:
-      type.setKind(OrcProto.Type.Kind.STRUCT);
-      for(TypeDescription t: children) {
-        type.addSubtypes(t.getId());
-      }
-      for(String field: typeDescr.getFieldNames()) {
-        type.addFieldNames(field);
-      }
-      break;
-    case UNION:
-      type.setKind(OrcProto.Type.Kind.UNION);
-      for(TypeDescription t: children) {
-        type.addSubtypes(t.getId());
-      }
-      break;
-    default:
-      throw new IllegalArgumentException("Unknown category: " +
-          typeDescr.getCategory());
-    }
-    result.add(type.build());
-    if (children != null) {
-      for(TypeDescription child: children) {
-        appendOrcTypes(result, child);
-      }
-    }
-  }
-
-  /**
-   * NOTE: This method ignores the subtype numbers in the TypeDescription rebuilds the subtype
-   * numbers based on the length of the result list being appended.
-   *
-   * @param result
-   * @param typeDescr
-   */
-  public static void appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
-      TypeDescription typeDescr) {
-
-    int subtype = result.size();
-    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
-    boolean needsAdd = true;
-    List<TypeDescription> children = typeDescr.getChildren();
-    switch (typeDescr.getCategory()) {
-    case BOOLEAN:
-      type.setKind(OrcProto.Type.Kind.BOOLEAN);
-      break;
-    case BYTE:
-      type.setKind(OrcProto.Type.Kind.BYTE);
-      break;
-    case SHORT:
-      type.setKind(OrcProto.Type.Kind.SHORT);
-      break;
-    case INT:
-      type.setKind(OrcProto.Type.Kind.INT);
-      break;
-    case LONG:
-      type.setKind(OrcProto.Type.Kind.LONG);
-      break;
-    case FLOAT:
-      type.setKind(OrcProto.Type.Kind.FLOAT);
-      break;
-    case DOUBLE:
-      type.setKind(OrcProto.Type.Kind.DOUBLE);
-      break;
-    case STRING:
-      type.setKind(OrcProto.Type.Kind.STRING);
-      break;
-    case CHAR:
-      type.setKind(OrcProto.Type.Kind.CHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case VARCHAR:
-      type.setKind(OrcProto.Type.Kind.VARCHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case BINARY:
-      type.setKind(OrcProto.Type.Kind.BINARY);
-      break;
-    case TIMESTAMP:
-      type.setKind(OrcProto.Type.Kind.TIMESTAMP);
-      break;
-    case DATE:
-      type.setKind(OrcProto.Type.Kind.DATE);
-      break;
-    case DECIMAL:
-      type.setKind(OrcProto.Type.Kind.DECIMAL);
-      type.setPrecision(typeDescr.getPrecision());
-      type.setScale(typeDescr.getScale());
-      break;
-    case LIST:
-      type.setKind(OrcProto.Type.Kind.LIST);
-      type.addSubtypes(++subtype);
-      result.add(type.build());
-      needsAdd = false;
-      appendOrcTypesRebuildSubtypes(result, children.get(0));
-      break;
-    case MAP:
-      {
-        // Make room for MAP type.
-        result.add(null);
-  
-        // Add MAP type pair in order to determine their subtype values.
-        appendOrcTypesRebuildSubtypes(result, children.get(0));
-        int subtype2 = result.size();
-        appendOrcTypesRebuildSubtypes(result, children.get(1));
-        type.setKind(OrcProto.Type.Kind.MAP);
-        type.addSubtypes(subtype + 1);
-        type.addSubtypes(subtype2);
-        result.set(subtype, type.build());
-        needsAdd = false;
-      }
-      break;
-    case STRUCT:
-      {
-        List<String> fieldNames = typeDescr.getFieldNames();
-
-        // Make room for STRUCT type.
-        result.add(null);
-
-        List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
-        for(TypeDescription child: children) {
-          int fieldSubtype = result.size();
-          fieldSubtypes.add(fieldSubtype);
-          appendOrcTypesRebuildSubtypes(result, child);
-        }
-
-        type.setKind(OrcProto.Type.Kind.STRUCT);
-
-        for (int i = 0 ; i < fieldNames.size(); i++) {
-          type.addSubtypes(fieldSubtypes.get(i));
-          type.addFieldNames(fieldNames.get(i));
-        }
-        result.set(subtype, type.build());
-        needsAdd = false;
-      }
-      break;
-    case UNION:
-      {
-        // Make room for UNION type.
-        result.add(null);
-
-        List<Integer> unionSubtypes = new ArrayList<Integer>(children.size());
-        for(TypeDescription child: children) {
-          int unionSubtype = result.size();
-          unionSubtypes.add(unionSubtype);
-          appendOrcTypesRebuildSubtypes(result, child);
-        }
-
-        type.setKind(OrcProto.Type.Kind.UNION);
-        for (int i = 0 ; i < children.size(); i++) {
-          type.addSubtypes(unionSubtypes.get(i));
-        }
-        result.set(subtype, type.build());
-        needsAdd = false;
-      }
-      break;
-    default:
-      throw new IllegalArgumentException("Unknown category: " + typeDescr.getCategory());
-    }
-    if (needsAdd) {
-      result.add(type.build());
-    }
-  }
-
-  /**
-   * NOTE: This method ignores the subtype numbers in the OrcProto.Type rebuilds the subtype
-   * numbers based on the length of the result list being appended.
-   *
-   * @param result
-   * @param types
-   * @param columnId
-   */
-  public static int appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
-      List<OrcProto.Type> types, int columnId) {
-
-    OrcProto.Type oldType = types.get(columnId++);
-
-    int subtype = result.size();
-    OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
-    boolean needsAdd = true;
-    switch (oldType.getKind()) {
-    case BOOLEAN:
-      builder.setKind(OrcProto.Type.Kind.BOOLEAN);
-      break;
-    case BYTE:
-      builder.setKind(OrcProto.Type.Kind.BYTE);
-      break;
-    case SHORT:
-      builder.setKind(OrcProto.Type.Kind.SHORT);
-      break;
-    case INT:
-      builder.setKind(OrcProto.Type.Kind.INT);
-      break;
-    case LONG:
-      builder.setKind(OrcProto.Type.Kind.LONG);
-      break;
-    case FLOAT:
-      builder.setKind(OrcProto.Type.Kind.FLOAT);
-      break;
-    case DOUBLE:
-      builder.setKind(OrcProto.Type.Kind.DOUBLE);
-      break;
-    case STRING:
-      builder.setKind(OrcProto.Type.Kind.STRING);
-      break;
-    case CHAR:
-      builder.setKind(OrcProto.Type.Kind.CHAR);
-      builder.setMaximumLength(oldType.getMaximumLength());
-      break;
-    case VARCHAR:
-      builder.setKind(OrcProto.Type.Kind.VARCHAR);
-      builder.setMaximumLength(oldType.getMaximumLength());
-      break;
-    case BINARY:
-      builder.setKind(OrcProto.Type.Kind.BINARY);
-      break;
-    case TIMESTAMP:
-      builder.setKind(OrcProto.Type.Kind.TIMESTAMP);
-      break;
-    case DATE:
-      builder.setKind(OrcProto.Type.Kind.DATE);
-      break;
-    case DECIMAL:
-      builder.setKind(OrcProto.Type.Kind.DECIMAL);
-      builder.setPrecision(oldType.getPrecision());
-      builder.setScale(oldType.getScale());
-      break;
-    case LIST:
-      builder.setKind(OrcProto.Type.Kind.LIST);
-      builder.addSubtypes(++subtype);
-      result.add(builder.build());
-      needsAdd = false;
-      columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-      break;
-    case MAP:
-      {
-        // Make room for MAP type.
-        result.add(null);
-  
-        // Add MAP type pair in order to determine their subtype values.
-        columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        int subtype2 = result.size();
-        columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        builder.setKind(OrcProto.Type.Kind.MAP);
-        builder.addSubtypes(subtype + 1);
-        builder.addSubtypes(subtype2);
-        result.set(subtype, builder.build());
-        needsAdd = false;
-      }
-      break;
-    case STRUCT:
-      {
-        List<String> fieldNames = oldType.getFieldNamesList();
-
-        // Make room for STRUCT type.
-        result.add(null);
-
-        List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
-        for(int i = 0 ; i < fieldNames.size(); i++) {
-          int fieldSubtype = result.size();
-          fieldSubtypes.add(fieldSubtype);
-          columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        }
-
-        builder.setKind(OrcProto.Type.Kind.STRUCT);
-
-        for (int i = 0 ; i < fieldNames.size(); i++) {
-          builder.addSubtypes(fieldSubtypes.get(i));
-          builder.addFieldNames(fieldNames.get(i));
-        }
-        result.set(subtype, builder.build());
-        needsAdd = false;
-      }
-      break;
-    case UNION:
-      {
-        int subtypeCount = oldType.getSubtypesCount();
-
-        // Make room for UNION type.
-        result.add(null);
-
-        List<Integer> unionSubtypes = new ArrayList<Integer>(subtypeCount);
-        for(int i = 0 ; i < subtypeCount; i++) {
-          int unionSubtype = result.size();
-          unionSubtypes.add(unionSubtype);
-          columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        }
-
-        builder.setKind(OrcProto.Type.Kind.UNION);
-        for (int i = 0 ; i < subtypeCount; i++) {
-          builder.addSubtypes(unionSubtypes.get(i));
-        }
-        result.set(subtype, builder.build());
-        needsAdd = false;
-      }
-      break;
-    default:
-      throw new IllegalArgumentException("Unknown category: " + oldType.getKind());
-    }
-    if (needsAdd) {
-      result.add(builder.build());
-    }
-    return columnId;
-  }
-
-  /**
-   * Translate the given rootColumn from the list of types to a TypeDescription.
-   * @param types all of the types
-   * @param rootColumn translate this type
-   * @return a new TypeDescription that matches the given rootColumn
-   */
-  public static
-        TypeDescription convertTypeFromProtobuf(List<OrcProto.Type> types,
-                                                int rootColumn) {
-    OrcProto.Type type = types.get(rootColumn);
-    switch (type.getKind()) {
-      case BOOLEAN:
-        return TypeDescription.createBoolean();
-      case BYTE:
-        return TypeDescription.createByte();
-      case SHORT:
-        return TypeDescription.createShort();
-      case INT:
-        return TypeDescription.createInt();
-      case LONG:
-        return TypeDescription.createLong();
-      case FLOAT:
-        return TypeDescription.createFloat();
-      case DOUBLE:
-        return TypeDescription.createDouble();
-      case STRING:
-        return TypeDescription.createString();
-      case CHAR:
-      case VARCHAR: {
-        TypeDescription result = type.getKind() == OrcProto.Type.Kind.CHAR ?
-            TypeDescription.createChar() : TypeDescription.createVarchar();
-        if (type.hasMaximumLength()) {
-          result.withMaxLength(type.getMaximumLength());
-        }
-        return result;
-      }
-      case BINARY:
-        return TypeDescription.createBinary();
-      case TIMESTAMP:
-        return TypeDescription.createTimestamp();
-      case DATE:
-        return TypeDescription.createDate();
-      case DECIMAL: {
-        TypeDescription result = TypeDescription.createDecimal();
-        if (type.hasScale()) {
-          result.withScale(type.getScale());
-        }
-        if (type.hasPrecision()) {
-          result.withPrecision(type.getPrecision());
-        }
-        return result;
-      }
-      case LIST:
-        return TypeDescription.createList(
-            convertTypeFromProtobuf(types, type.getSubtypes(0)));
-      case MAP:
-        return TypeDescription.createMap(
-            convertTypeFromProtobuf(types, type.getSubtypes(0)),
-            convertTypeFromProtobuf(types, type.getSubtypes(1)));
-      case STRUCT: {
-        TypeDescription result = TypeDescription.createStruct();
-        for(int f=0; f < type.getSubtypesCount(); ++f) {
-          result.addField(type.getFieldNames(f),
-              convertTypeFromProtobuf(types, type.getSubtypes(f)));
-        }
-        return result;
-      }
-      case UNION: {
-        TypeDescription result = TypeDescription.createUnion();
-        for(int f=0; f < type.getSubtypesCount(); ++f) {
-          result.addUnionChild(
-              convertTypeFromProtobuf(types, type.getSubtypes(f)));
-        }
-        return result;
-      }
-    }
-    throw new IllegalArgumentException("Unknown ORC type " + type.getKind());
-  }
-
-  public static List<StripeInformation> convertProtoStripesToStripes(
-      List<OrcProto.StripeInformation> stripes) {
-    List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
-    for (OrcProto.StripeInformation info : stripes) {
-      result.add(new ReaderImpl.StripeInformationImpl(info));
-    }
-    return result;
-  }
-
-  public static List<TypeDescription> setTypeBuilderFromSchema(
-      OrcProto.Type.Builder type, TypeDescription schema) {
-    List<TypeDescription> children = schema.getChildren();
-    switch (schema.getCategory()) {
-      case BOOLEAN:
-        type.setKind(OrcProto.Type.Kind.BOOLEAN);
-        break;
-      case BYTE:
-        type.setKind(OrcProto.Type.Kind.BYTE);
-        break;
-      case SHORT:
-        type.setKind(OrcProto.Type.Kind.SHORT);
-        break;
-      case INT:
-        type.setKind(OrcProto.Type.Kind.INT);
-        break;
-      case LONG:
-        type.setKind(OrcProto.Type.Kind.LONG);
-        break;
-      case FLOAT:
-        type.setKind(OrcProto.Type.Kind.FLOAT);
-        break;
-      case DOUBLE:
-        type.setKind(OrcProto.Type.Kind.DOUBLE);
-        break;
-      case STRING:
-        type.setKind(OrcProto.Type.Kind.STRING);
-        break;
-      case CHAR:
-        type.setKind(OrcProto.Type.Kind.CHAR);
-        type.setMaximumLength(schema.getMaxLength());
-        break;
-      case VARCHAR:
-        type.setKind(OrcProto.Type.Kind.VARCHAR);
-        type.setMaximumLength(schema.getMaxLength());
-        break;
-      case BINARY:
-        type.setKind(OrcProto.Type.Kind.BINARY);
-        break;
-      case TIMESTAMP:
-        type.setKind(OrcProto.Type.Kind.TIMESTAMP);
-        break;
-      case DATE:
-        type.setKind(OrcProto.Type.Kind.DATE);
-        break;
-      case DECIMAL:
-        type.setKind(OrcProto.Type.Kind.DECIMAL);
-        type.setPrecision(schema.getPrecision());
-        type.setScale(schema.getScale());
-        break;
-      case LIST:
-        type.setKind(OrcProto.Type.Kind.LIST);
-        type.addSubtypes(children.get(0).getId());
-        break;
-      case MAP:
-        type.setKind(OrcProto.Type.Kind.MAP);
-        for(TypeDescription t: children) {
-          type.addSubtypes(t.getId());
-        }
-        break;
-      case STRUCT:
-        type.setKind(OrcProto.Type.Kind.STRUCT);
-        for(TypeDescription t: children) {
-          type.addSubtypes(t.getId());
-        }
-        for(String field: schema.getFieldNames()) {
-          type.addFieldNames(field);
-        }
-        break;
-      case UNION:
-        type.setKind(OrcProto.Type.Kind.UNION);
-        for(TypeDescription t: children) {
-          type.addSubtypes(t.getId());
-        }
-        break;
-      default:
-        throw new IllegalArgumentException("Unknown category: " +
-          schema.getCategory());
-    }
-    return children;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/Reader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/Reader.java b/orc/src/java/org/apache/orc/Reader.java
deleted file mode 100644
index c2d5235..0000000
--- a/orc/src/java/org/apache/orc/Reader.java
+++ /dev/null
@@ -1,375 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-
-/**
- * The interface for reading ORC files.
- *
- * One Reader can support multiple concurrent RecordReader.
- */
-public interface Reader {
-
-  /**
-   * Get the number of rows in the file.
-   * @return the number of rows
-   */
-  long getNumberOfRows();
-
-  /**
-   * Get the deserialized data size of the file
-   * @return raw data size
-   */
-  long getRawDataSize();
-
-  /**
-   * Get the deserialized data size of the specified columns
-   * @param colNames
-   * @return raw data size of columns
-   */
-  long getRawDataSizeOfColumns(List<String> colNames);
-
-  /**
-   * Get the deserialized data size of the specified columns ids
-   * @param colIds - internal column id (check orcfiledump for column ids)
-   * @return raw data size of columns
-   */
-  long getRawDataSizeFromColIndices(List<Integer> colIds);
-
-  /**
-   * Get the user metadata keys.
-   * @return the set of metadata keys
-   */
-  List<String> getMetadataKeys();
-
-  /**
-   * Get a user metadata value.
-   * @param key a key given by the user
-   * @return the bytes associated with the given key
-   */
-  ByteBuffer getMetadataValue(String key);
-
-  /**
-   * Did the user set the given metadata value.
-   * @param key the key to check
-   * @return true if the metadata value was set
-   */
-  boolean hasMetadataValue(String key);
-
-  /**
-   * Get the compression kind.
-   * @return the kind of compression in the file
-   */
-  CompressionKind getCompressionKind();
-
-  /**
-   * Get the buffer size for the compression.
-   * @return number of bytes to buffer for the compression codec.
-   */
-  int getCompressionSize();
-
-  /**
-   * Get the number of rows per a entry in the row index.
-   * @return the number of rows per an entry in the row index or 0 if there
-   * is no row index.
-   */
-  int getRowIndexStride();
-
-  /**
-   * Get the list of stripes.
-   * @return the information about the stripes in order
-   */
-  List<StripeInformation> getStripes();
-
-  /**
-   * Get the length of the file.
-   * @return the number of bytes in the file
-   */
-  long getContentLength();
-
-  /**
-   * Get the statistics about the columns in the file.
-   * @return the information about the column
-   */
-  ColumnStatistics[] getStatistics();
-
-  /**
-   * Get the type of rows in this ORC file.
-   */
-  TypeDescription getSchema();
-
-  /**
-   * Get the list of types contained in the file. The root type is the first
-   * type in the list.
-   * @return the list of flattened types
-   * @deprecated use getSchema instead
-   */
-  List<OrcProto.Type> getTypes();
-
-  /**
-   * Get the file format version.
-   */
-  OrcFile.Version getFileVersion();
-
-  /**
-   * Get the version of the writer of this file.
-   */
-  OrcFile.WriterVersion getWriterVersion();
-
-  /**
-   * Get the file tail (footer + postscript)
-   *
-   * @return - file tail
-   */
-  OrcProto.FileTail getFileTail();
-
-  /**
-   * Options for creating a RecordReader.
-   */
-  public static class Options {
-    private boolean[] include;
-    private long offset = 0;
-    private long length = Long.MAX_VALUE;
-    private SearchArgument sarg = null;
-    private String[] columnNames = null;
-    private Boolean useZeroCopy = null;
-    private Boolean skipCorruptRecords = null;
-    private TypeDescription schema = null;
-    private DataReader dataReader = null;
-
-    /**
-     * Set the list of columns to read.
-     * @param include a list of columns to read
-     * @return this
-     */
-    public Options include(boolean[] include) {
-      this.include = include;
-      return this;
-    }
-
-    /**
-     * Set the range of bytes to read
-     * @param offset the starting byte offset
-     * @param length the number of bytes to read
-     * @return this
-     */
-    public Options range(long offset, long length) {
-      this.offset = offset;
-      this.length = length;
-      return this;
-    }
-
-    /**
-     * Set the schema on read type description.
-     */
-    public Options schema(TypeDescription schema) {
-      this.schema = schema;
-      return this;
-    }
-
-    /**
-     * Set search argument for predicate push down.
-     * @param sarg the search argument
-     * @param columnNames the column names for
-     * @return this
-     */
-    public Options searchArgument(SearchArgument sarg, String[] columnNames) {
-      this.sarg = sarg;
-      this.columnNames = columnNames;
-      return this;
-    }
-
-    /**
-     * Set whether to use zero copy from HDFS.
-     * @param value the new zero copy flag
-     * @return this
-     */
-    public Options useZeroCopy(boolean value) {
-      this.useZeroCopy = value;
-      return this;
-    }
-
-    public Options dataReader(DataReader value) {
-      this.dataReader = value;
-      return this;
-    }
-
-    /**
-     * Set whether to skip corrupt records.
-     * @param value the new skip corrupt records flag
-     * @return this
-     */
-    public Options skipCorruptRecords(boolean value) {
-      this.skipCorruptRecords = value;
-      return this;
-    }
-
-    public boolean[] getInclude() {
-      return include;
-    }
-
-    public long getOffset() {
-      return offset;
-    }
-
-    public long getLength() {
-      return length;
-    }
-
-    public TypeDescription getSchema() {
-      return schema;
-    }
-
-    public SearchArgument getSearchArgument() {
-      return sarg;
-    }
-
-    public String[] getColumnNames() {
-      return columnNames;
-    }
-
-    public long getMaxOffset() {
-      long result = offset + length;
-      if (result < 0) {
-        result = Long.MAX_VALUE;
-      }
-      return result;
-    }
-
-    public Boolean getUseZeroCopy() {
-      return useZeroCopy;
-    }
-
-    public Boolean getSkipCorruptRecords() {
-      return skipCorruptRecords;
-    }
-
-    public DataReader getDataReader() {
-      return dataReader;
-    }
-
-    public Options clone() {
-      Options result = new Options();
-      result.include = include;
-      result.offset = offset;
-      result.length = length;
-      result.sarg = sarg;
-      result.schema = schema;
-      result.columnNames = columnNames;
-      result.useZeroCopy = useZeroCopy;
-      result.skipCorruptRecords = skipCorruptRecords;
-      result.dataReader = dataReader == null ? null : dataReader.clone();
-      return result;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buffer = new StringBuilder();
-      buffer.append("{include: ");
-      if (include == null) {
-        buffer.append("null");
-      } else {
-        buffer.append("[");
-        for(int i=0; i < include.length; ++i) {
-          if (i != 0) {
-            buffer.append(", ");
-          }
-          buffer.append(include[i]);
-        }
-        buffer.append("]");
-      }
-      buffer.append(", offset: ");
-      buffer.append(offset);
-      buffer.append(", length: ");
-      buffer.append(length);
-      if (sarg != null) {
-        buffer.append(", sarg: ");
-        buffer.append(sarg.toString());
-        buffer.append(", columns: [");
-        for(int i=0; i < columnNames.length; ++i) {
-          if (i != 0) {
-            buffer.append(", ");
-          }
-          buffer.append("'");
-          buffer.append(columnNames[i]);
-          buffer.append("'");
-        }
-        buffer.append("]");
-      }
-      if (schema != null) {
-        buffer.append(", schema: ");
-        schema.printToBuffer(buffer);
-      }
-      buffer.append("}");
-      return buffer.toString();
-    }
-  }
-
-  /**
-   * Create a RecordReader that reads everything with the default options.
-   * @return a new RecordReader
-   * @throws IOException
-   */
-  RecordReader rows() throws IOException;
-
-  /**
-   * Create a RecordReader that uses the options given.
-   * This method can't be named rows, because many callers used rows(null)
-   * before the rows() method was introduced.
-   * @param options the options to read with
-   * @return a new RecordReader
-   * @throws IOException
-   */
-  RecordReader rows(Options options) throws IOException;
-
-  /**
-   * @return List of integers representing version of the file, in order from major to minor.
-   */
-  List<Integer> getVersionList();
-
-  /**
-   * @return Gets the size of metadata, in bytes.
-   */
-  int getMetadataSize();
-
-  /**
-   * @return Stripe statistics, in original protobuf form.
-   */
-  List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics();
-
-  /**
-   * @return Stripe statistics.
-   */
-  List<StripeStatistics> getStripeStatistics() throws IOException;
-
-  /**
-   * @return File statistics, in original protobuf form.
-   */
-  List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics();
-
-  /**
-   * @return Serialized file metadata read from disk for the purposes of caching, etc.
-   */
-  ByteBuffer getSerializedFileFooter();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/RecordReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/RecordReader.java b/orc/src/java/org/apache/orc/RecordReader.java
deleted file mode 100644
index 09ba0f0..0000000
--- a/orc/src/java/org/apache/orc/RecordReader.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-/**
- * A row-by-row iterator for ORC files.
- */
-public interface RecordReader {
-  /**
-   * Read the next row batch. The size of the batch to read cannot be
-   * controlled by the callers. Caller need to look at
-   * VectorizedRowBatch.size of the retunred object to know the batch
-   * size read.
-   * @param batch a row batch object to read into
-   * @return were more rows available to read?
-   * @throws java.io.IOException
-   */
-  boolean nextBatch(VectorizedRowBatch batch) throws IOException;
-
-  /**
-   * Get the row number of the row that will be returned by the following
-   * call to next().
-   * @return the row number from 0 to the number of rows in the file
-   * @throws java.io.IOException
-   */
-  long getRowNumber() throws IOException;
-
-  /**
-   * Get the progress of the reader through the rows.
-   * @return a fraction between 0.0 and 1.0 of rows read
-   * @throws java.io.IOException
-   */
-  float getProgress() throws IOException;
-
-  /**
-   * Release the resources associated with the given reader.
-   * @throws java.io.IOException
-   */
-  void close() throws IOException;
-
-  /**
-   * Seek to a particular row number.
-   */
-  void seekToRow(long rowCount) throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/StringColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/StringColumnStatistics.java b/orc/src/java/org/apache/orc/StringColumnStatistics.java
deleted file mode 100644
index 5a868d0..0000000
--- a/orc/src/java/org/apache/orc/StringColumnStatistics.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for string columns.
- */
-public interface StringColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum string.
-   * @return the minimum
-   */
-  String getMinimum();
-
-  /**
-   * Get the maximum string.
-   * @return the maximum
-   */
-  String getMaximum();
-
-  /**
-   * Get the total length of all strings
-   * @return the sum (total length)
-   */
-  long getSum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/StripeInformation.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/StripeInformation.java b/orc/src/java/org/apache/orc/StripeInformation.java
deleted file mode 100644
index 38f7eba..0000000
--- a/orc/src/java/org/apache/orc/StripeInformation.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-/**
- * Information about the stripes in an ORC file that is provided by the Reader.
- */
-public interface StripeInformation {
-  /**
-   * Get the byte offset of the start of the stripe.
-   * @return the bytes from the start of the file
-   */
-  long getOffset();
-
-  /**
-   * Get the total length of the stripe in bytes.
-   * @return the number of bytes in the stripe
-   */
-  long getLength();
-
-  /**
-   * Get the length of the stripe's indexes.
-   * @return the number of bytes in the index
-   */
-  long getIndexLength();
-
-  /**
-   * Get the length of the stripe's data.
-   * @return the number of bytes in the stripe
-   */
-  long getDataLength();
-
-  /**
-   * Get the length of the stripe's tail section, which contains its index.
-   * @return the number of bytes in the tail
-   */
-  long getFooterLength();
-
-  /**
-   * Get the number of rows in the stripe.
-   * @return a count of the number of rows
-   */
-  long getNumberOfRows();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/StripeStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/StripeStatistics.java b/orc/src/java/org/apache/orc/StripeStatistics.java
deleted file mode 100644
index 8fc91cb..0000000
--- a/orc/src/java/org/apache/orc/StripeStatistics.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.orc.impl.ColumnStatisticsImpl;
-
-import java.util.List;
-
-public class StripeStatistics {
-  private final List<OrcProto.ColumnStatistics> cs;
-
-  public StripeStatistics(List<OrcProto.ColumnStatistics> list) {
-    this.cs = list;
-  }
-
-  /**
-   * Return list of column statistics
-   *
-   * @return column stats
-   */
-  public ColumnStatistics[] getColumnStatistics() {
-    ColumnStatistics[] result = new ColumnStatistics[cs.size()];
-    for (int i = 0; i < result.length; ++i) {
-      result[i] = ColumnStatisticsImpl.deserialize(cs.get(i));
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/TimestampColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/TimestampColumnStatistics.java b/orc/src/java/org/apache/orc/TimestampColumnStatistics.java
deleted file mode 100644
index 27dc49f..0000000
--- a/orc/src/java/org/apache/orc/TimestampColumnStatistics.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.sql.Timestamp;
-
-/**
- * Statistics for Timestamp columns.
- */
-public interface TimestampColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum value for the column.
-   * @return minimum value
-   */
-  Timestamp getMinimum();
-
-  /**
-   * Get the maximum value for the column.
-   * @return maximum value
-   */
-  Timestamp getMaximum();
-}


Mime
View raw message