hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject [36/43] hive git commit: HIVE-11253. Move SearchArgument and VectorizedRowBatch classes to storage-api. (omalley reviewed by prasanthj)
Date Fri, 31 Jul 2015 00:43:36 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
deleted file mode 100644
index 3a92565..0000000
--- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.sarg;
-
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.List;
-
-/**
- * The primitive predicates that form a SearchArgument.
- */
-public interface PredicateLeaf {
-
-  /**
-   * The possible operators for predicates. To get the opposites, construct
-   * an expression with a not operator.
-   */
-  public static enum Operator {
-    EQUALS,
-    NULL_SAFE_EQUALS,
-    LESS_THAN,
-    LESS_THAN_EQUALS,
-    IN,
-    BETWEEN,
-    IS_NULL
-  }
-
-  /**
-   * The possible types for sargs.
-   */
-  public static enum Type {
-    INTEGER(Integer.class), // all of the integer types except long
-    LONG(Long.class),
-    FLOAT(Double.class),   // float and double
-    STRING(String.class),  // string, char, varchar
-    DATE(Date.class),
-    DECIMAL(HiveDecimalWritable.class),
-    TIMESTAMP(Timestamp.class),
-    BOOLEAN(Boolean.class);
-
-    private final Class cls;
-    Type(Class cls) {
-      this.cls = cls;
-    }
-
-    /**
-     * For all SARG leaves, the values must be the matching class.
-     * @return the value class
-     */
-    public Class getValueClass() {
-      return cls;
-    }
-  }
-
-  /**
-   * Get the operator for the leaf.
-   */
-  public Operator getOperator();
-
-  /**
-   * Get the type of the column and literal by the file format.
-   */
-  public Type getType();
-
-  /**
-   * Get the simple column name.
-   * @return the column name
-   */
-  public String getColumnName();
-
-  /**
-   * Get the literal half of the predicate leaf. Adapt the original type for what orc needs
-   *
-   * @return an Integer, Long, Double, or String
-   */
-  public Object getLiteral();
-
-  /**
-   * For operators with multiple literals (IN and BETWEEN), get the literals.
-   *
-   * @return the list of literals (Integer, Longs, Doubles, or Strings)
-   *
-   */
-  public List<Object> getLiteralList();
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
deleted file mode 100644
index bc0d503..0000000
--- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
+++ /dev/null
@@ -1,298 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.sarg;
-
-import java.util.List;
-
-/**
- * Primary interface for <a href="http://en.wikipedia.org/wiki/Sargable">
- *   SearchArgument</a>, which are the subset of predicates
- * that can be pushed down to the RecordReader. Each SearchArgument consists
- * of a series of SearchClauses that must each be true for the row to be
- * accepted by the filter.
- *
- * This requires that the filter be normalized into conjunctive normal form
- * (<a href="http://en.wikipedia.org/wiki/Conjunctive_normal_form">CNF</a>).
- */
-public interface SearchArgument {
-
-  /**
-   * The potential result sets of logical operations.
-   */
-  public static enum TruthValue {
-    YES, NO, NULL, YES_NULL, NO_NULL, YES_NO, YES_NO_NULL;
-
-    /**
-     * Compute logical or between the two values.
-     * @param right the other argument or null
-     * @return the result
-     */
-    public TruthValue or(TruthValue right) {
-      if (right == null || right == this) {
-        return this;
-      }
-      if (right == YES || this == YES) {
-        return YES;
-      }
-      if (right == YES_NULL || this == YES_NULL) {
-        return YES_NULL;
-      }
-      if (right == NO) {
-        return this;
-      }
-      if (this == NO) {
-        return right;
-      }
-      if (this == NULL) {
-        if (right == NO_NULL) {
-          return NULL;
-        } else {
-          return YES_NULL;
-        }
-      }
-      if (right == NULL) {
-        if (this == NO_NULL) {
-          return NULL;
-        } else {
-          return YES_NULL;
-        }
-      }
-      return YES_NO_NULL;
-    }
-
-    /**
-     * Compute logical AND between the two values.
-     * @param right the other argument or null
-     * @return the result
-     */
-    public TruthValue and(TruthValue right) {
-      if (right == null || right == this) {
-        return this;
-      }
-      if (right == NO || this == NO) {
-        return NO;
-      }
-      if (right == NO_NULL || this == NO_NULL) {
-        return NO_NULL;
-      }
-      if (right == YES) {
-        return this;
-      }
-      if (this == YES) {
-        return right;
-      }
-      if (this == NULL) {
-        if (right == YES_NULL) {
-          return NULL;
-        } else {
-          return NO_NULL;
-        }
-      }
-      if (right == NULL) {
-        if (this == YES_NULL) {
-          return NULL;
-        } else {
-          return NO_NULL;
-        }
-      }
-      return YES_NO_NULL;
-    }
-
-    public TruthValue not() {
-      switch (this) {
-        case NO:
-          return YES;
-        case YES:
-          return NO;
-        case NULL:
-        case YES_NO:
-        case YES_NO_NULL:
-          return this;
-        case NO_NULL:
-          return YES_NULL;
-        case YES_NULL:
-          return NO_NULL;
-        default:
-          throw new IllegalArgumentException("Unknown value: " + this);
-      }
-    }
-
-    /**
-     * Does the RecordReader need to include this set of records?
-     * @return true unless none of the rows qualify
-     */
-    public boolean isNeeded() {
-      switch (this) {
-        case NO:
-        case NULL:
-        case NO_NULL:
-          return false;
-        default:
-          return true;
-      }
-    }
-  }
-
-  /**
-   * Get the leaf predicates that are required to evaluate the predicate. The
-   * list will have the duplicates removed.
-   * @return the list of leaf predicates
-   */
-  public List<PredicateLeaf> getLeaves();
-
-  /**
-   * Get the expression tree. This should only needed for file formats that
-   * need to translate the expression to an internal form.
-   */
-  public ExpressionTree getExpression();
- 
-  /**
-   * Evaluate the entire predicate based on the values for the leaf predicates.
-   * @param leaves the value of each leaf predicate
-   * @return the value of hte entire predicate
-   */
-  public TruthValue evaluate(TruthValue[] leaves);
-
-  /**
-   * Serialize the SARG as a kyro object and return the base64 string.
-   *
-   * Hive should replace the current XML-based AST serialization for predicate pushdown
-   * with the Kryo serialization of the SARG because the representation is much more
-   * compact and focused on what is needed for predicate pushdown.
-   *
-   * @return the serialized SARG
-   */
-  public String toKryo();
-
-  /**
-   * A builder object for contexts outside of Hive where it isn't easy to
-   * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot
-   * before adding any leaves.
-   */
-  public interface Builder {
-
-    /**
-     * Start building an or operation and push it on the stack.
-     * @return this
-     */
-    public Builder startOr();
-
-    /**
-     * Start building an and operation and push it on the stack.
-     * @return this
-     */
-    public Builder startAnd();
-
-    /**
-     * Start building a not operation and push it on the stack.
-     * @return this
-     */
-    public Builder startNot();
-
-    /**
-     * Finish the current operation and pop it off of the stack. Each start
-     * call must have a matching end.
-     * @return this
-     */
-    public Builder end();
-
-    /**
-     * Add a less than leaf to the current item on the stack.
-     * @param column the name of the column
-     * @param type the type of the expression
-     * @param literal the literal
-     * @return this
-     */
-    public Builder lessThan(String column, PredicateLeaf.Type type,
-                            Object literal);
-
-    /**
-     * Add a less than equals leaf to the current item on the stack.
-     * @param column the name of the column
-     * @param type the type of the expression
-     * @param literal the literal
-     * @return this
-     */
-    public Builder lessThanEquals(String column, PredicateLeaf.Type type,
-                                  Object literal);
-
-    /**
-     * Add an equals leaf to the current item on the stack.
-     * @param column the name of the column
-     * @param type the type of the expression
-     * @param literal the literal
-     * @return this
-     */
-    public Builder equals(String column, PredicateLeaf.Type type,
-                          Object literal);
-
-    /**
-     * Add a null safe equals leaf to the current item on the stack.
-     * @param column the name of the column
-     * @param type the type of the expression
-     * @param literal the literal
-     * @return this
-     */
-    public Builder nullSafeEquals(String column, PredicateLeaf.Type type,
-                                  Object literal);
-
-    /**
-     * Add an in leaf to the current item on the stack.
-     * @param column the name of the column
-     * @param type the type of the expression
-     * @param literal the literal
-     * @return this
-     */
-    public Builder in(String column, PredicateLeaf.Type type,
-                      Object... literal);
-
-    /**
-     * Add an is null leaf to the current item on the stack.
-     * @param column the name of the column
-     * @param type the type of the expression
-     * @return this
-     */
-    public Builder isNull(String column, PredicateLeaf.Type type);
-
-    /**
-     * Add a between leaf to the current item on the stack.
-     * @param column the name of the column
-     * @param type the type of the expression
-     * @param lower the literal
-     * @param upper the literal
-     * @return this
-     */
-    public Builder between(String column, PredicateLeaf.Type type,
-                           Object lower, Object upper);
-
-    /**
-     * Add a truth value to the expression.
-     * @param truth
-     * @return this
-     */
-    public Builder literal(TruthValue truth);
-
-    /**
-     * Build and return the SearchArgument that has been defined. All of the
-     * starts must have been ended before this call.
-     * @return the new SearchArgument
-     */
-    public SearchArgument build();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
deleted file mode 100644
index 0578d24..0000000
--- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.serde2.io;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.math.BigInteger;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableUtils;
-
-public class HiveDecimalWritable implements WritableComparable<HiveDecimalWritable> {
-
-  static final private Log LOG = LogFactory.getLog(HiveDecimalWritable.class);
-
-  private byte[] internalStorage = new byte[0];
-  private int scale;
-
-  public HiveDecimalWritable() {
-  }
-
-  public HiveDecimalWritable(String value) {
-    set(HiveDecimal.create(value));
-  }
-
-  public HiveDecimalWritable(byte[] bytes, int scale) {
-    set(bytes, scale);
-  }
-
-  public HiveDecimalWritable(HiveDecimalWritable writable) {
-    set(writable.getHiveDecimal());
-  }
-
-  public HiveDecimalWritable(HiveDecimal value) {
-    set(value);
-  }
-
-  public HiveDecimalWritable(long value) {
-    set((HiveDecimal.create(value)));
-  }
-
-  public void set(HiveDecimal value) {
-    set(value.unscaledValue().toByteArray(), value.scale());
-  }
-
-  public void set(HiveDecimal value, int maxPrecision, int maxScale) {
-    set(HiveDecimal.enforcePrecisionScale(value, maxPrecision, maxScale));
-  }
-
-  public void set(HiveDecimalWritable writable) {
-    set(writable.getHiveDecimal());
-  }
-
-  public void set(byte[] bytes, int scale) {
-    this.internalStorage = bytes;
-    this.scale = scale;
-  }
-
-  public HiveDecimal getHiveDecimal() {
-    return HiveDecimal.create(new BigInteger(internalStorage), scale);
-  }
-
-  /**
-   * Get a HiveDecimal instance from the writable and constraint it with maximum precision/scale.
-   *
-   * @param maxPrecision maximum precision
-   * @param maxScale maximum scale
-   * @return HiveDecimal instance
-   */
-  public HiveDecimal getHiveDecimal(int maxPrecision, int maxScale) {
-     return HiveDecimal.enforcePrecisionScale(HiveDecimal.
-             create(new BigInteger(internalStorage), scale),
-         maxPrecision, maxScale);
-  }
-
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    scale = WritableUtils.readVInt(in);
-    int byteArrayLen = WritableUtils.readVInt(in);
-    if (internalStorage.length != byteArrayLen) {
-      internalStorage = new byte[byteArrayLen];
-    }
-    in.readFully(internalStorage);
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    WritableUtils.writeVInt(out, scale);
-    WritableUtils.writeVInt(out, internalStorage.length);
-    out.write(internalStorage);
-  }
-
-  @Override
-  public int compareTo(HiveDecimalWritable that) {
-    return getHiveDecimal().compareTo(that.getHiveDecimal());
-  }
-
-  @Override
-  public String toString() {
-    return getHiveDecimal().toString();
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (this == other) {
-      return true;
-    }
-    if (other == null || getClass() != other.getClass()) {
-      return false;
-    }
-    HiveDecimalWritable bdw = (HiveDecimalWritable) other;
-
-    // 'equals' and 'compareTo' are not compatible with HiveDecimals. We want
-    // compareTo which returns true iff the numbers are equal (e.g.: 3.14 is
-    // the same as 3.140). 'Equals' returns true iff equal and the same scale
-    // is set in the decimals (e.g.: 3.14 is not the same as 3.140)
-    return getHiveDecimal().compareTo(bdw.getHiveDecimal()) == 0;
-  }
-
-  @Override
-  public int hashCode() {
-    return getHiveDecimal().hashCode();
-  }
-
-  /* (non-Javadoc)
-   * In order to update a Decimal128 fast (w/o allocation) we need to expose access to the
-   * internal storage bytes and scale.  
-   * @return
-   */
-  public byte[] getInternalStorage() {
-    return internalStorage;
-  }
-  
-  /* (non-Javadoc)
-   * In order to update a Decimal128 fast (w/o allocation) we need to expose access to the
-   * internal storage bytes and scale.  
-   */
-  public int getScale() {
-    return scale;
-  }
-
-  public static
-  HiveDecimalWritable enforcePrecisionScale(HiveDecimalWritable writable,
-                                            int precision, int scale) {
-    if (writable == null) {
-      return null;
-    }
-
-    HiveDecimal dec =
-        HiveDecimal.enforcePrecisionScale(writable.getHiveDecimal(), precision,
-            scale);
-    return dec == null ? null : new HiveDecimalWritable(dec);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/pom.xml
----------------------------------------------------------------------
diff --git a/storage-api/pom.xml b/storage-api/pom.xml
new file mode 100644
index 0000000..71b51b8
--- /dev/null
+++ b/storage-api/pom.xml
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hive</groupId>
+    <artifactId>hive</artifactId>
+    <version>2.0.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>hive-storage-api</artifactId>
+  <packaging>jar</packaging>
+  <name>Hive Storage API</name>
+
+  <properties>
+    <hive.path.to.root>..</hive.path.to.root>
+  </properties>
+
+  <dependencies>
+    <!-- dependencies are always listed in sorted order by groupId, artifectId -->
+    <!-- inter-project -->
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <version>${log4j.version}</version>
+    </dependency>
+    <!-- test inter-project -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <profiles>
+    <profile>
+      <id>hadoop-1</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
+          <version>${hadoop-20S.version}</version>
+         <optional>true</optional>
+        </dependency>
+      </dependencies>
+    </profile>
+   <profile>
+      <id>hadoop-2</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+          <version>${hadoop-23.version}</version>
+          <optional>true</optional>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
+  <build>
+    <sourceDirectory>${basedir}/src/java</sourceDirectory>
+    <testSourceDirectory>${basedir}/src/test</testSourceDirectory>
+    <testResources>
+      <testResource>
+        <directory>${basedir}/src/test/resources</directory>
+      </testResource>
+    </testResources>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
new file mode 100644
index 0000000..7d7fb28
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
@@ -0,0 +1,312 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.type;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.math.RoundingMode;
+
+/**
+ *
+ * HiveDecimal. Simple wrapper for BigDecimal. Adds fixed max precision and non scientific string
+ * representation
+ *
+ */
+public class HiveDecimal implements Comparable<HiveDecimal> {
+  public static final int MAX_PRECISION = 38;
+  public static final int MAX_SCALE = 38;
+
+  /**
+   * Default precision/scale when user doesn't specify in the column metadata, such as
+   * decimal and decimal(8).
+   */
+  public static final int USER_DEFAULT_PRECISION = 10;
+  public static final int USER_DEFAULT_SCALE = 0;
+
+  /**
+   *  Default precision/scale when system is not able to determine them, such as in case
+   *  of a non-generic udf.
+   */
+  public static final int SYSTEM_DEFAULT_PRECISION = 38;
+  public static final int SYSTEM_DEFAULT_SCALE = 18;
+
+  public static final HiveDecimal ZERO = new HiveDecimal(BigDecimal.ZERO);
+  public static final HiveDecimal ONE = new HiveDecimal(BigDecimal.ONE);
+
+  public static final int ROUND_FLOOR = BigDecimal.ROUND_FLOOR;
+  public static final int ROUND_CEILING = BigDecimal.ROUND_CEILING;
+  public static final int ROUND_HALF_UP = BigDecimal.ROUND_HALF_UP;
+
+  private BigDecimal bd = BigDecimal.ZERO;
+
+  private HiveDecimal(BigDecimal bd) {
+    this.bd = bd;
+  }
+
+  public static HiveDecimal create(BigDecimal b) {
+    return create(b, true);
+  }
+
+  public static HiveDecimal create(BigDecimal b, boolean allowRounding) {
+    BigDecimal bd = normalize(b, allowRounding);
+    return bd == null ? null : new HiveDecimal(bd);
+  }
+
+  public static HiveDecimal create(BigInteger unscaled, int scale) {
+    BigDecimal bd = normalize(new BigDecimal(unscaled, scale), true);
+    return bd == null ? null : new HiveDecimal(bd);
+  }
+
+  public static HiveDecimal create(String dec) {
+    BigDecimal bd;
+    try {
+      bd = new BigDecimal(dec.trim());
+    } catch (NumberFormatException ex) {
+      return null;
+    }
+
+    bd = normalize(bd, true);
+    return bd == null ? null : new HiveDecimal(bd);
+  }
+
+  public static HiveDecimal create(BigInteger bi) {
+    BigDecimal bd = normalize(new BigDecimal(bi), true);
+    return bd == null ? null : new HiveDecimal(bd);
+  }
+
+  public static HiveDecimal create(int i) {
+    return new HiveDecimal(new BigDecimal(i));
+  }
+
+  public static HiveDecimal create(long l) {
+    return new HiveDecimal(new BigDecimal(l));
+  }
+
+  @Override
+  public String toString() {
+     return bd.toPlainString();
+  }
+
+  public HiveDecimal setScale(int i) {
+    return new HiveDecimal(bd.setScale(i, RoundingMode.HALF_UP));
+  }
+
+  @Override
+  public int compareTo(HiveDecimal dec) {
+    return bd.compareTo(dec.bd);
+  }
+
+  @Override
+  public int hashCode() {
+    return bd.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null || obj.getClass() != getClass()) {
+      return false;
+    }
+    return bd.equals(((HiveDecimal) obj).bd);
+  }
+
+  public int scale() {
+    return bd.scale();
+  }
+
+  /**
+   * Returns the number of digits (integer and fractional) in the number, which is equivalent
+   * to SQL decimal precision. Note that this is different from BigDecimal.precision(),
+   * which returns the precision of the unscaled value (BigDecimal.valueOf(0.01).precision() = 1,
+   * whereas HiveDecimal.create("0.01").precision() = 2).
+   * If you want the BigDecimal precision, use HiveDecimal.bigDecimalValue().precision()
+   * @return
+   */
+  public int precision() {
+    int bdPrecision = bd.precision();
+    int bdScale = bd.scale();
+
+    if (bdPrecision < bdScale) {
+      // This can happen for numbers less than 0.1
+      // For 0.001234: bdPrecision=4, bdScale=6
+      // In this case, we'll set the type to have the same precision as the scale.
+      return bdScale;
+    }
+    return bdPrecision;
+  }
+
+  public int intValue() {
+    return bd.intValue();
+  }
+
+  public double doubleValue() {
+    return bd.doubleValue();
+  }
+
+  public long longValue() {
+    return bd.longValue();
+  }
+
+  public short shortValue() {
+    return bd.shortValue();
+  }
+
+  public float floatValue() {
+    return bd.floatValue();
+  }
+
+  public BigDecimal bigDecimalValue() {
+    return bd;
+  }
+
+  public byte byteValue() {
+    return bd.byteValue();
+  }
+
+  public HiveDecimal setScale(int adjustedScale, int rm) {
+    return create(bd.setScale(adjustedScale, rm));
+  }
+
+  public HiveDecimal subtract(HiveDecimal dec) {
+    return create(bd.subtract(dec.bd));
+  }
+
+  public HiveDecimal multiply(HiveDecimal dec) {
+    return create(bd.multiply(dec.bd), false);
+  }
+
+  public BigInteger unscaledValue() {
+    return bd.unscaledValue();
+  }
+
+  public HiveDecimal scaleByPowerOfTen(int n) {
+    return create(bd.scaleByPowerOfTen(n));
+  }
+
+  public HiveDecimal abs() {
+    return create(bd.abs());
+  }
+
+  public HiveDecimal negate() {
+    return create(bd.negate());
+  }
+
+  public HiveDecimal add(HiveDecimal dec) {
+    return create(bd.add(dec.bd));
+  }
+
+  public HiveDecimal pow(int n) {
+    BigDecimal result = normalize(bd.pow(n), false);
+    return result == null ? null : new HiveDecimal(result);
+  }
+
+  public HiveDecimal remainder(HiveDecimal dec) {
+    return create(bd.remainder(dec.bd));
+  }
+
+  public HiveDecimal divide(HiveDecimal dec) {
+    return create(bd.divide(dec.bd, MAX_SCALE, RoundingMode.HALF_UP), true);
+  }
+
+  /**
+   * Get the sign of the underlying decimal.
+   * @return 0 if the decimal is equal to 0, -1 if less than zero, and 1 if greater than 0
+   */
+  public int signum() {
+    return bd.signum();
+  }
+
+  private static BigDecimal trim(BigDecimal d) {
+    if (d.compareTo(BigDecimal.ZERO) == 0) {
+      // Special case for 0, because java doesn't strip zeros correctly on that number.
+      d = BigDecimal.ZERO;
+    } else {
+      d = d.stripTrailingZeros();
+      if (d.scale() < 0) {
+        // no negative scale decimals
+        d = d.setScale(0);
+      }
+    }
+    return d;
+  }
+
+  private static BigDecimal normalize(BigDecimal bd, boolean allowRounding) {
+    if (bd == null) {
+      return null;
+    }
+
+    bd = trim(bd);
+
+    int intDigits = bd.precision() - bd.scale();
+
+    if (intDigits > MAX_PRECISION) {
+      return null;
+    }
+
+    int maxScale = Math.min(MAX_SCALE, Math.min(MAX_PRECISION - intDigits, bd.scale()));
+    if (bd.scale() > maxScale ) {
+      if (allowRounding) {
+        bd = bd.setScale(maxScale, RoundingMode.HALF_UP);
+        // Trimming is again necessary, because rounding may introduce new trailing 0's.
+        bd = trim(bd);
+      } else {
+        bd = null;
+      }
+    }
+
+    return bd;
+  }
+
+  public static BigDecimal enforcePrecisionScale(BigDecimal bd, int maxPrecision, int maxScale) {
+    if (bd == null) {
+      return null;
+    }
+
+    bd = trim(bd);
+
+    if (bd.scale() > maxScale) {
+      bd = bd.setScale(maxScale, RoundingMode.HALF_UP);
+    }
+
+    int maxIntDigits = maxPrecision - maxScale;
+    int intDigits = bd.precision() - bd.scale();
+    if (intDigits > maxIntDigits) {
+      return null;
+    }
+
+    return bd;
+  }
+
+  public static HiveDecimal enforcePrecisionScale(HiveDecimal dec, int maxPrecision, int maxScale) {
+    if (dec == null) {
+      return null;
+    }
+
+    // Minor optimization, avoiding creating new objects.
+    if (dec.precision() - dec.scale() <= maxPrecision - maxScale &&
+        dec.scale() <= maxScale) {
+      return dec;
+    }
+
+    BigDecimal bd = enforcePrecisionScale(dec.bd, maxPrecision, maxScale);
+    if (bd == null) {
+      return null;
+    }
+
+    return HiveDecimal.create(bd);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
new file mode 100644
index 0000000..02c52fa
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -0,0 +1,322 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * This class supports string and binary data by value reference -- i.e. each field is
+ * explicitly present, as opposed to provided by a dictionary reference.
+ * In some cases, all the values will be in the same byte array to begin with,
+ * but this need not be the case. If each value is in a separate byte
+ * array to start with, or not all of the values are in the same original
+ * byte array, you can still assign data by reference into this column vector.
+ * This gives flexibility to use this in multiple situations.
+ * <p>
+ * When setting data by reference, the caller
+ * is responsible for allocating the byte arrays used to hold the data.
+ * You can also set data by value, as long as you call the initBuffer() method first.
+ * You can mix "by value" and "by reference" in the same column vector,
+ * though that use is probably not typical.
+ */
+public class BytesColumnVector extends ColumnVector {
+  public byte[][] vector;
+  public int[] start;          // start offset of each field
+
+  /*
+   * The length of each field. If the value repeats for every entry, then it is stored
+   * in vector[0] and isRepeating from the superclass is set to true.
+   */
+  public int[] length;
+  private byte[] buffer;   // optional buffer to use when actually copying in data
+  private int nextFree;    // next free position in buffer
+
+  // Estimate that there will be 16 bytes per entry
+  static final int DEFAULT_BUFFER_SIZE = 16 * VectorizedRowBatch.DEFAULT_SIZE;
+
+  // Proportion of extra space to provide when allocating more buffer space.
+  static final float EXTRA_SPACE_FACTOR = (float) 1.2;
+
+  /**
+   * Use this constructor for normal operation.
+   * All column vectors should be the default size normally.
+   */
+  public BytesColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Don't call this constructor except for testing purposes.
+   *
+   * @param size  number of elements in the column vector
+   */
+  public BytesColumnVector(int size) {
+    super(size);
+    vector = new byte[size][];
+    start = new int[size];
+    length = new int[size];
+  }
+
+  /**
+   * Additional reset work for BytesColumnVector (releasing scratch bytes for by value strings).
+   */
+  @Override
+  public void reset() {
+    super.reset();
+    initBuffer(0);
+  }
+
+  /** Set a field by reference.
+   *
+   * @param elementNum index within column vector to set
+   * @param sourceBuf container of source data
+   * @param start start byte position within source
+   * @param length  length of source byte sequence
+   */
+  public void setRef(int elementNum, byte[] sourceBuf, int start, int length) {
+    vector[elementNum] = sourceBuf;
+    this.start[elementNum] = start;
+    this.length[elementNum] = length;
+  }
+
+  /**
+   * You must call initBuffer first before using setVal().
+   * Provide the estimated number of bytes needed to hold
+   * a full column vector worth of byte string data.
+   *
+   * @param estimatedValueSize  Estimated size of buffer space needed
+   */
+  public void initBuffer(int estimatedValueSize) {
+    nextFree = 0;
+
+    // if buffer is already allocated, keep using it, don't re-allocate
+    if (buffer != null) {
+      return;
+    }
+
+    // allocate a little extra space to limit need to re-allocate
+    int bufferSize = this.vector.length * (int)(estimatedValueSize * EXTRA_SPACE_FACTOR);
+    if (bufferSize < DEFAULT_BUFFER_SIZE) {
+      bufferSize = DEFAULT_BUFFER_SIZE;
+    }
+    buffer = new byte[bufferSize];
+  }
+
+  /**
+   * Initialize buffer to default size.
+   */
+  public void initBuffer() {
+    initBuffer(0);
+  }
+
+  /**
+   * @return amount of buffer space currently allocated
+   */
+  public int bufferSize() {
+    if (buffer == null) {
+      return 0;
+    }
+    return buffer.length;
+  }
+
+  /**
+   * Set a field by actually copying in to a local buffer.
+   * If you must actually copy data in to the array, use this method.
+   * DO NOT USE this method unless it's not practical to set data by reference with setRef().
+   * Setting data by reference tends to run a lot faster than copying data in.
+   *
+   * @param elementNum index within column vector to set
+   * @param sourceBuf container of source data
+   * @param start start byte position within source
+   * @param length  length of source byte sequence
+   */
+  public void setVal(int elementNum, byte[] sourceBuf, int start, int length) {
+    if ((nextFree + length) > buffer.length) {
+      increaseBufferSpace(length);
+    }
+    System.arraycopy(sourceBuf, start, buffer, nextFree, length);
+    vector[elementNum] = buffer;
+    this.start[elementNum] = nextFree;
+    this.length[elementNum] = length;
+    nextFree += length;
+  }
+
+  /**
+   * Set a field to the concatenation of two string values. Result data is copied
+   * into the internal buffer.
+   *
+   * @param elementNum index within column vector to set
+   * @param leftSourceBuf container of left argument
+   * @param leftStart start of left argument
+   * @param leftLen length of left argument
+   * @param rightSourceBuf container of right argument
+   * @param rightStart start of right argument
+   * @param rightLen length of right arugment
+   */
+  public void setConcat(int elementNum, byte[] leftSourceBuf, int leftStart, int leftLen,
+      byte[] rightSourceBuf, int rightStart, int rightLen) {
+    int newLen = leftLen + rightLen;
+    if ((nextFree + newLen) > buffer.length) {
+      increaseBufferSpace(newLen);
+    }
+    vector[elementNum] = buffer;
+    this.start[elementNum] = nextFree;
+    this.length[elementNum] = newLen;
+
+    System.arraycopy(leftSourceBuf, leftStart, buffer, nextFree, leftLen);
+    nextFree += leftLen;
+    System.arraycopy(rightSourceBuf, rightStart, buffer, nextFree, rightLen);
+    nextFree += rightLen;
+  }
+
+  /**
+   * Increase buffer space enough to accommodate next element.
+   * This uses an exponential increase mechanism to rapidly
+   * increase buffer size to enough to hold all data.
+   * As batches get re-loaded, buffer space allocated will quickly
+   * stabilize.
+   *
+   * @param nextElemLength size of next element to be added
+   */
+  public void increaseBufferSpace(int nextElemLength) {
+
+    // Keep doubling buffer size until there will be enough space for next element.
+    int newLength = 2 * buffer.length;
+    while((nextFree + nextElemLength) > newLength) {
+      newLength *= 2;
+    }
+
+    // Allocate new buffer, copy data to it, and set buffer to new buffer.
+    byte[] newBuffer = new byte[newLength];
+    System.arraycopy(buffer, 0, newBuffer, 0, nextFree);
+    buffer = newBuffer;
+  }
+
+  /** Copy the current object contents into the output. Only copy selected entries,
+    * as indicated by selectedInUse and the sel array.
+    */
+  public void copySelected(
+      boolean selectedInUse, int[] sel, int size, BytesColumnVector output) {
+
+    // Output has nulls if and only if input has nulls.
+    output.noNulls = noNulls;
+    output.isRepeating = false;
+
+    // Handle repeating case
+    if (isRepeating) {
+      output.setVal(0, vector[0], start[0], length[0]);
+      output.isNull[0] = isNull[0];
+      output.isRepeating = true;
+      return;
+    }
+
+    // Handle normal case
+
+    // Copy data values over
+    if (selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = sel[j];
+        output.setVal(i, vector[i], start[i], length[i]);
+      }
+    }
+    else {
+      for (int i = 0; i < size; i++) {
+        output.setVal(i, vector[i], start[i], length[i]);
+      }
+    }
+
+    // Copy nulls over if needed
+    if (!noNulls) {
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          output.isNull[i] = isNull[i];
+        }
+      }
+      else {
+        System.arraycopy(isNull, 0, output.isNull, 0, size);
+      }
+    }
+  }
+
+  /** Simplify vector by brute-force flattening noNulls and isRepeating
+    * This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+    * with many arguments, at the expense of loss of some performance.
+    */
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    flattenPush();
+    if (isRepeating) {
+      isRepeating = false;
+
+      // setRef is used below and this is safe, because the reference
+      // is to data owned by this column vector. If this column vector
+      // gets re-used, the whole thing is re-used together so there
+      // is no danger of a dangling reference.
+
+      // Only copy data values if entry is not null. The string value
+      // at position 0 is undefined if the position 0 value is null.
+      if (noNulls || !isNull[0]) {
+
+        // loops start at position 1 because position 0 is already set
+        if (selectedInUse) {
+          for (int j = 1; j < size; j++) {
+            int i = sel[j];
+            this.setRef(i, vector[0], start[0], length[0]);
+          }
+        } else {
+          for (int i = 1; i < size; i++) {
+            this.setRef(i, vector[0], start[0], length[0]);
+          }
+        }
+      }
+      flattenRepeatingNulls(selectedInUse, sel, size);
+    }
+    flattenNoNulls(selectedInUse, sel, size);
+  }
+
+  // Fill the all the vector entries with provided value
+  public void fill(byte[] value) {
+    noNulls = true;
+    isRepeating = true;
+    setRef(0, value, 0, value.length);
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+    BytesColumnVector in = (BytesColumnVector) inputVector;
+    setVal(outElementNum, in.vector[inputElementNum], in.start[inputElementNum], in.length[inputElementNum]);
+  }
+
+  @Override
+  public void init() {
+    initBuffer(0);
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append('"');
+      buffer.append(new String(this.buffer, start[row], length[row]));
+      buffer.append('"');
+    } else {
+      buffer.append("null");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
new file mode 100644
index 0000000..cb75c2c
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+/**
+ * ColumnVector contains the shared structure for the sub-types,
+ * including NULL information, and whether this vector
+ * repeats, i.e. has all values the same, so only the first
+ * one is set. This is used to accelerate query performance
+ * by handling a whole vector in O(1) time when applicable.
+ *
+ * The fields are public by design since this is a performance-critical
+ * structure that is used in the inner loop of query execution.
+ */
+public abstract class ColumnVector {
+
+  /*
+   * The current kinds of column vectors.
+   */
+  public static enum Type {
+    LONG,
+    DOUBLE,
+    BYTES,
+    DECIMAL
+  }
+
+  /*
+   * If hasNulls is true, then this array contains true if the value
+   * is null, otherwise false. The array is always allocated, so a batch can be re-used
+   * later and nulls added.
+   */
+  public boolean[] isNull;
+
+  // If the whole column vector has no nulls, this is true, otherwise false.
+  public boolean noNulls;
+
+  /*
+   * True if same value repeats for whole column vector.
+   * If so, vector[0] holds the repeating value.
+   */
+  public boolean isRepeating;
+
+  // Variables to hold state from before flattening so it can be easily restored.
+  private boolean preFlattenIsRepeating;
+  private boolean preFlattenNoNulls;
+
+  /**
+   * Constructor for super-class ColumnVector. This is not called directly,
+   * but used to initialize inherited fields.
+   *
+   * @param len Vector length
+   */
+  public ColumnVector(int len) {
+    isNull = new boolean[len];
+    noNulls = true;
+    isRepeating = false;
+  }
+
+  /**
+     * Resets the column to default state
+     *  - fills the isNull array with false
+     *  - sets noNulls to true
+     *  - sets isRepeating to false
+     */
+    public void reset() {
+      if (false == noNulls) {
+        Arrays.fill(isNull, false);
+      }
+      noNulls = true;
+      isRepeating = false;
+    }
+
+    abstract public void flatten(boolean selectedInUse, int[] sel, int size);
+
+    // Simplify vector by brute-force flattening noNulls if isRepeating
+    // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+    // with many arguments.
+    public void flattenRepeatingNulls(boolean selectedInUse, int[] sel, int size) {
+
+      boolean nullFillValue;
+
+      if (noNulls) {
+        nullFillValue = false;
+      } else {
+        nullFillValue = isNull[0];
+      }
+
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          isNull[i] = nullFillValue;
+        }
+      } else {
+        Arrays.fill(isNull, 0, size, nullFillValue);
+      }
+
+      // all nulls are now explicit
+      noNulls = false;
+    }
+
+    public void flattenNoNulls(boolean selectedInUse, int[] sel, int size) {
+      if (noNulls) {
+        noNulls = false;
+        if (selectedInUse) {
+          for (int j = 0; j < size; j++) {
+            int i = sel[j];
+            isNull[i] = false;
+          }
+        } else {
+          Arrays.fill(isNull, 0, size, false);
+        }
+      }
+    }
+
+    /**
+     * Restore the state of isRepeating and noNulls to what it was
+     * before flattening. This must only be called just after flattening
+     * and then evaluating a VectorExpression on the column vector.
+     * It is an optimization that allows other operations on the same
+     * column to continue to benefit from the isRepeating and noNulls
+     * indicators.
+     */
+    public void unFlatten() {
+      isRepeating = preFlattenIsRepeating;
+      noNulls = preFlattenNoNulls;
+    }
+
+    // Record repeating and no nulls state to be restored later.
+    protected void flattenPush() {
+      preFlattenIsRepeating = isRepeating;
+      preFlattenNoNulls = noNulls;
+    }
+
+    /**
+     * Set the element in this column vector from the given input vector.
+     */
+    public abstract void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector);
+
+    /**
+     * Initialize the column vector. This method can be overridden by specific column vector types.
+     * Use this method only if the individual type of the column vector is not known, otherwise its
+     * preferable to call specific initialization methods.
+     */
+    public void init() {
+      // Do nothing by default
+    }
+
+    /**
+     * Print the value for this column into the given string builder.
+     * @param buffer the buffer to print into
+     * @param row the id of the row to print
+     */
+    public abstract void stringifyValue(StringBuilder buffer,
+                                        int row);
+  }

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
new file mode 100644
index 0000000..74a9d5f
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.math.BigInteger;
+
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+
+public class DecimalColumnVector extends ColumnVector {
+
+  /**
+   * A vector of HiveDecimalWritable objects.
+   *
+   * For high performance and easy access to this low-level structure,
+   * the fields are public by design (as they are in other ColumnVector
+   * types).
+   */
+  public HiveDecimalWritable[] vector;
+  public short scale;
+  public short precision;
+
+  public DecimalColumnVector(int precision, int scale) {
+    this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale);
+  }
+
+  public DecimalColumnVector(int size, int precision, int scale) {
+    super(size);
+    this.precision = (short) precision;
+    this.scale = (short) scale;
+    vector = new HiveDecimalWritable[size];
+    for (int i = 0; i < size; i++) {
+      vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO);
+    }
+  }
+
+  @Override
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    // TODO Auto-generated method stub
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+    HiveDecimal hiveDec = ((DecimalColumnVector) inputVector).vector[inputElementNum].getHiveDecimal(precision, scale);
+    if (hiveDec == null) {
+      noNulls = false;
+      isNull[outElementNum] = true;
+    } else {
+      vector[outElementNum].set(hiveDec);
+    }
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append(vector[row].toString());
+    } else {
+      buffer.append("null");
+    }
+  }
+
+  public void set(int elementNum, HiveDecimalWritable writeable) {
+    HiveDecimal hiveDec = writeable.getHiveDecimal(precision, scale);
+    if (hiveDec == null) {
+      noNulls = false;
+      isNull[elementNum] = true;
+    } else {
+      vector[elementNum].set(hiveDec);
+    }
+  }
+
+  public void set(int elementNum, HiveDecimal hiveDec) {
+    HiveDecimal checkedDec = HiveDecimal.enforcePrecisionScale(hiveDec, precision, scale);
+    if (checkedDec == null) {
+      noNulls = false;
+      isNull[elementNum] = true;
+    } else {
+      vector[elementNum].set(checkedDec);
+    }
+  }
+
+  public void setNullDataValue(int elementNum) {
+    // E.g. For scale 2 the minimum is "0.01"
+    HiveDecimal minimumNonZeroValue = HiveDecimal.create(BigInteger.ONE, scale);
+    vector[elementNum].set(minimumNonZeroValue);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
new file mode 100644
index 0000000..4a7811d
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+/**
+ * This class represents a nullable double precision floating point column vector.
+ * This class will be used for operations on all floating point types (float, double)
+ * and as such will use a 64-bit double value to hold the biggest possible value.
+ * During copy-in/copy-out, smaller types (i.e. float) will be converted as needed. This will
+ * reduce the amount of code that needs to be generated and also will run fast since the
+ * machine operates with 64-bit words.
+ *
+ * The vector[] field is public by design for high-performance access in the inner
+ * loop of query execution.
+ */
+public class DoubleColumnVector extends ColumnVector {
+  public double[] vector;
+  public static final double NULL_VALUE = Double.NaN;
+
+  /**
+   * Use this constructor by default. All column vectors
+   * should normally be the default size.
+   */
+  public DoubleColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Don't use this except for testing purposes.
+   *
+   * @param len
+   */
+  public DoubleColumnVector(int len) {
+    super(len);
+    vector = new double[len];
+  }
+
+  // Copy the current object contents into the output. Only copy selected entries,
+  // as indicated by selectedInUse and the sel array.
+  public void copySelected(
+      boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
+
+    // Output has nulls if and only if input has nulls.
+    output.noNulls = noNulls;
+    output.isRepeating = false;
+
+    // Handle repeating case
+    if (isRepeating) {
+      output.vector[0] = vector[0];
+      output.isNull[0] = isNull[0];
+      output.isRepeating = true;
+      return;
+    }
+
+    // Handle normal case
+
+    // Copy data values over
+    if (selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = sel[j];
+        output.vector[i] = vector[i];
+      }
+    }
+    else {
+      System.arraycopy(vector, 0, output.vector, 0, size);
+    }
+
+    // Copy nulls over if needed
+    if (!noNulls) {
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          output.isNull[i] = isNull[i];
+        }
+      }
+      else {
+        System.arraycopy(isNull, 0, output.isNull, 0, size);
+      }
+    }
+  }
+
+  // Fill the column vector with the provided value
+  public void fill(double value) {
+    noNulls = true;
+    isRepeating = true;
+    vector[0] = value;
+  }
+
+  // Simplify vector by brute-force flattening noNulls and isRepeating
+  // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+  // with many arguments.
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    flattenPush();
+    if (isRepeating) {
+      isRepeating = false;
+      double repeatVal = vector[0];
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          vector[i] = repeatVal;
+        }
+      } else {
+        Arrays.fill(vector, 0, size, repeatVal);
+      }
+      flattenRepeatingNulls(selectedInUse, sel, size);
+    }
+    flattenNoNulls(selectedInUse, sel, size);
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+    vector[outElementNum] = ((DoubleColumnVector) inputVector).vector[inputElementNum];
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append(vector[row]);
+    } else {
+      buffer.append("null");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
new file mode 100644
index 0000000..5702584
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+/**
+ * This class represents a nullable int column vector.
+ * This class will be used for operations on all integer types (tinyint, smallint, int, bigint)
+ * and as such will use a 64-bit long value to hold the biggest possible value.
+ * During copy-in/copy-out, smaller int types will be converted as needed. This will
+ * reduce the amount of code that needs to be generated and also will run fast since the
+ * machine operates with 64-bit words.
+ *
+ * The vector[] field is public by design for high-performance access in the inner
+ * loop of query execution.
+ */
+public class LongColumnVector extends ColumnVector {
+  public long[] vector;
+  public static final long NULL_VALUE = 1;
+
+  /**
+   * Use this constructor by default. All column vectors
+   * should normally be the default size.
+   */
+  public LongColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Don't use this except for testing purposes.
+   *
+   * @param len the number of rows
+   */
+  public LongColumnVector(int len) {
+    super(len);
+    vector = new long[len];
+  }
+
+  // Copy the current object contents into the output. Only copy selected entries,
+  // as indicated by selectedInUse and the sel array.
+  public void copySelected(
+      boolean selectedInUse, int[] sel, int size, LongColumnVector output) {
+
+    // Output has nulls if and only if input has nulls.
+    output.noNulls = noNulls;
+    output.isRepeating = false;
+
+    // Handle repeating case
+    if (isRepeating) {
+      output.vector[0] = vector[0];
+      output.isNull[0] = isNull[0];
+      output.isRepeating = true;
+      return;
+    }
+
+    // Handle normal case
+
+    // Copy data values over
+    if (selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = sel[j];
+        output.vector[i] = vector[i];
+      }
+    }
+    else {
+      System.arraycopy(vector, 0, output.vector, 0, size);
+    }
+
+    // Copy nulls over if needed
+    if (!noNulls) {
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          output.isNull[i] = isNull[i];
+        }
+      }
+      else {
+        System.arraycopy(isNull, 0, output.isNull, 0, size);
+      }
+    }
+  }
+
+  // Copy the current object contents into the output. Only copy selected entries,
+  // as indicated by selectedInUse and the sel array.
+  public void copySelected(
+      boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
+
+    // Output has nulls if and only if input has nulls.
+    output.noNulls = noNulls;
+    output.isRepeating = false;
+
+    // Handle repeating case
+    if (isRepeating) {
+      output.vector[0] = vector[0];  // automatic conversion to double is done here
+      output.isNull[0] = isNull[0];
+      output.isRepeating = true;
+      return;
+    }
+
+    // Handle normal case
+
+    // Copy data values over
+    if (selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = sel[j];
+        output.vector[i] = vector[i];
+      }
+    }
+    else {
+      for(int i = 0; i < size; ++i) {
+        output.vector[i] = vector[i];
+      }
+    }
+
+    // Copy nulls over if needed
+    if (!noNulls) {
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          output.isNull[i] = isNull[i];
+        }
+      }
+      else {
+        System.arraycopy(isNull, 0, output.isNull, 0, size);
+      }
+    }
+  }
+
+  // Fill the column vector with the provided value
+  public void fill(long value) {
+    noNulls = true;
+    isRepeating = true;
+    vector[0] = value;
+  }
+
+  // Simplify vector by brute-force flattening noNulls and isRepeating
+  // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+  // with many arguments.
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    flattenPush();
+    if (isRepeating) {
+      isRepeating = false;
+      long repeatVal = vector[0];
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          vector[i] = repeatVal;
+        }
+      } else {
+        Arrays.fill(vector, 0, size, repeatVal);
+      }
+      flattenRepeatingNulls(selectedInUse, sel, size);
+    }
+    flattenNoNulls(selectedInUse, sel, size);
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+    vector[outElementNum] = ((LongColumnVector) inputVector).vector[inputElementNum];
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append(vector[row]);
+    } else {
+      buffer.append("null");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
new file mode 100644
index 0000000..7c18da6
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * A VectorizedRowBatch is a set of rows, organized with each column
+ * as a vector. It is the unit of query execution, organized to minimize
+ * the cost per row and achieve high cycles-per-instruction.
+ * The major fields are public by design to allow fast and convenient
+ * access by the vectorized query execution code.
+ */
+public class VectorizedRowBatch implements Writable {
+  public int numCols;           // number of columns
+  public ColumnVector[] cols;   // a vector for each column
+  public int size;              // number of rows that qualify (i.e. haven't been filtered out)
+  public int[] selected;        // array of positions of selected values
+  public int[] projectedColumns;
+  public int projectionSize;
+
+  /*
+   * If no filtering has been applied yet, selectedInUse is false,
+   * meaning that all rows qualify. If it is true, then the selected[] array
+   * records the offsets of qualifying rows.
+   */
+  public boolean selectedInUse;
+
+  // If this is true, then there is no data in the batch -- we have hit the end of input.
+  public boolean endOfFile;
+
+  /*
+   * This number is carefully chosen to minimize overhead and typically allows
+   * one VectorizedRowBatch to fit in cache.
+   */
+  public static final int DEFAULT_SIZE = 1024;
+
+  /**
+   * Return a batch with the specified number of columns.
+   * This is the standard constructor -- all batches should be the same size
+   *
+   * @param numCols the number of columns to include in the batch
+   */
+  public VectorizedRowBatch(int numCols) {
+    this(numCols, DEFAULT_SIZE);
+  }
+
+  /**
+   * Return a batch with the specified number of columns and rows.
+   * Only call this constructor directly for testing purposes.
+   * Batch size should normally always be defaultSize.
+   *
+   * @param numCols the number of columns to include in the batch
+   * @param size  the number of rows to include in the batch
+   */
+  public VectorizedRowBatch(int numCols, int size) {
+    this.numCols = numCols;
+    this.size = size;
+    selected = new int[size];
+    selectedInUse = false;
+    this.cols = new ColumnVector[numCols];
+    projectedColumns = new int[numCols];
+
+    // Initially all columns are projected and in the same order
+    projectionSize = numCols;
+    for (int i = 0; i < numCols; i++) {
+      projectedColumns[i] = i;
+    }
+  }
+
+  /**
+   * Returns the maximum size of the batch (number of rows it can hold)
+   */
+  public int getMaxSize() {
+      return selected.length;
+  }
+
+  /**
+   * Return count of qualifying rows.
+   *
+   * @return number of rows that have not been filtered out
+   */
+  public long count() {
+    return size;
+  }
+
+  private static String toUTF8(Object o) {
+    if(o == null || o instanceof NullWritable) {
+      return "\\N"; /* as found in LazySimpleSerDe's nullSequence */
+    }
+    return o.toString();
+  }
+
+  @Override
+  public String toString() {
+    if (size == 0) {
+      return "";
+    }
+    StringBuilder b = new StringBuilder();
+    if (this.selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = selected[j];
+        b.append('[');
+        for (int k = 0; k < projectionSize; k++) {
+          int projIndex = projectedColumns[k];
+          ColumnVector cv = cols[projIndex];
+          if (k > 0) {
+            b.append(", ");
+          }
+          cv.stringifyValue(b, i);
+        }
+        b.append(']');
+        if (j < size - 1) {
+          b.append('\n');
+        }
+      }
+    } else {
+      for (int i = 0; i < size; i++) {
+        b.append('[');
+        for (int k = 0; k < projectionSize; k++) {
+          int projIndex = projectedColumns[k];
+          ColumnVector cv = cols[projIndex];
+          if (k > 0) {
+            b.append(", ");
+          }
+          cv.stringifyValue(b, i);
+        }
+        b.append(']');
+        if (i < size - 1) {
+          b.append('\n');
+        }
+      }
+    }
+    return b.toString();
+  }
+
+  @Override
+  public void readFields(DataInput arg0) throws IOException {
+    throw new UnsupportedOperationException("Do you really need me?");
+  }
+
+  @Override
+  public void write(DataOutput arg0) throws IOException {
+    throw new UnsupportedOperationException("Don't call me");
+  }
+
+  /**
+   * Resets the row batch to default state
+   *  - sets selectedInUse to false
+   *  - sets size to 0
+   *  - sets endOfFile to false
+   *  - resets each column
+   *  - inits each column
+   */
+  public void reset() {
+    selectedInUse = false;
+    size = 0;
+    endOfFile = false;
+    for (ColumnVector vc : cols) {
+      if (vc != null) {
+        vc.reset();
+        vc.init();
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
new file mode 100644
index 0000000..577d95d
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * The inner representation of the SearchArgument. Most users should not
+ * need this interface, it is only for file formats that need to translate
+ * the SearchArgument into an internal form.
+ */
+public class ExpressionTree {
+  public enum Operator {OR, AND, NOT, LEAF, CONSTANT}
+  private final Operator operator;
+  private final List<ExpressionTree> children;
+  private final int leaf;
+  private final SearchArgument.TruthValue constant;
+
+  ExpressionTree() {
+    operator = null;
+    children = null;
+    leaf = 0;
+    constant = null;
+  }
+
+  ExpressionTree(Operator op, ExpressionTree... kids) {
+    operator = op;
+    children = new ArrayList<ExpressionTree>();
+    leaf = -1;
+    this.constant = null;
+    Collections.addAll(children, kids);
+  }
+
+  ExpressionTree(int leaf) {
+    operator = Operator.LEAF;
+    children = null;
+    this.leaf = leaf;
+    this.constant = null;
+  }
+
+  ExpressionTree(SearchArgument.TruthValue constant) {
+    operator = Operator.CONSTANT;
+    children = null;
+    this.leaf = -1;
+    this.constant = constant;
+  }
+
+  ExpressionTree(ExpressionTree other) {
+    this.operator = other.operator;
+    if (other.children == null) {
+      this.children = null;
+    } else {
+      this.children = new ArrayList<ExpressionTree>();
+      for(ExpressionTree child: other.children) {
+        children.add(new ExpressionTree(child));
+      }
+    }
+    this.leaf = other.leaf;
+    this.constant = other.constant;
+  }
+
+  public SearchArgument.TruthValue evaluate(SearchArgument.TruthValue[] leaves
+                                            ) {
+    SearchArgument.TruthValue result = null;
+    switch (operator) {
+      case OR:
+        for(ExpressionTree child: children) {
+          result = child.evaluate(leaves).or(result);
+        }
+        return result;
+      case AND:
+        for(ExpressionTree child: children) {
+          result = child.evaluate(leaves).and(result);
+        }
+        return result;
+      case NOT:
+        return children.get(0).evaluate(leaves).not();
+      case LEAF:
+        return leaves[leaf];
+      case CONSTANT:
+        return constant;
+      default:
+        throw new IllegalStateException("Unknown operator: " + operator);
+    }
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder buffer = new StringBuilder();
+    switch (operator) {
+      case OR:
+        buffer.append("(or");
+        for(ExpressionTree child: children) {
+          buffer.append(' ');
+          buffer.append(child.toString());
+        }
+        buffer.append(')');
+        break;
+      case AND:
+        buffer.append("(and");
+        for(ExpressionTree child: children) {
+          buffer.append(' ');
+          buffer.append(child.toString());
+        }
+        buffer.append(')');
+        break;
+      case NOT:
+        buffer.append("(not ");
+        buffer.append(children.get(0));
+        buffer.append(')');
+        break;
+      case LEAF:
+        buffer.append("leaf-");
+        buffer.append(leaf);
+        break;
+      case CONSTANT:
+        buffer.append(constant);
+        break;
+    }
+    return buffer.toString();
+  }
+
+  public Operator getOperator() {
+    return operator;
+  }
+
+  public List<ExpressionTree> getChildren() {
+    return children;
+  }
+
+  public SearchArgument.TruthValue getConstant() {
+    return constant;
+  }
+
+  public int getLeaf() {
+    return leaf;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
new file mode 100644
index 0000000..3a92565
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.List;
+
+/**
+ * The primitive predicates that form a SearchArgument.
+ */
+public interface PredicateLeaf {
+
+  /**
+   * The possible operators for predicates. To get the opposites, construct
+   * an expression with a not operator.
+   */
+  public static enum Operator {
+    EQUALS,
+    NULL_SAFE_EQUALS,
+    LESS_THAN,
+    LESS_THAN_EQUALS,
+    IN,
+    BETWEEN,
+    IS_NULL
+  }
+
+  /**
+   * The possible types for sargs.
+   */
+  public static enum Type {
+    INTEGER(Integer.class), // all of the integer types except long
+    LONG(Long.class),
+    FLOAT(Double.class),   // float and double
+    STRING(String.class),  // string, char, varchar
+    DATE(Date.class),
+    DECIMAL(HiveDecimalWritable.class),
+    TIMESTAMP(Timestamp.class),
+    BOOLEAN(Boolean.class);
+
+    private final Class cls;
+    Type(Class cls) {
+      this.cls = cls;
+    }
+
+    /**
+     * For all SARG leaves, the values must be the matching class.
+     * @return the value class
+     */
+    public Class getValueClass() {
+      return cls;
+    }
+  }
+
+  /**
+   * Get the operator for the leaf.
+   */
+  public Operator getOperator();
+
+  /**
+   * Get the type of the column and literal by the file format.
+   */
+  public Type getType();
+
+  /**
+   * Get the simple column name.
+   * @return the column name
+   */
+  public String getColumnName();
+
+  /**
+   * Get the literal half of the predicate leaf. Adapt the original type for what orc needs
+   *
+   * @return an Integer, Long, Double, or String
+   */
+  public Object getLiteral();
+
+  /**
+   * For operators with multiple literals (IN and BETWEEN), get the literals.
+   *
+   * @return the list of literals (Integer, Longs, Doubles, or Strings)
+   *
+   */
+  public List<Object> getLiteralList();
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
new file mode 100644
index 0000000..d70b3b0
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
@@ -0,0 +1,287 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import java.util.List;
+
+/**
+ * Primary interface for <a href="http://en.wikipedia.org/wiki/Sargable">
+ *   SearchArgument</a>, which are the subset of predicates
+ * that can be pushed down to the RecordReader. Each SearchArgument consists
+ * of a series of SearchClauses that must each be true for the row to be
+ * accepted by the filter.
+ *
+ * This requires that the filter be normalized into conjunctive normal form
+ * (<a href="http://en.wikipedia.org/wiki/Conjunctive_normal_form">CNF</a>).
+ */
+public interface SearchArgument {
+
+  /**
+   * The potential result sets of logical operations.
+   */
+  public static enum TruthValue {
+    YES, NO, NULL, YES_NULL, NO_NULL, YES_NO, YES_NO_NULL;
+
+    /**
+     * Compute logical or between the two values.
+     * @param right the other argument or null
+     * @return the result
+     */
+    public TruthValue or(TruthValue right) {
+      if (right == null || right == this) {
+        return this;
+      }
+      if (right == YES || this == YES) {
+        return YES;
+      }
+      if (right == YES_NULL || this == YES_NULL) {
+        return YES_NULL;
+      }
+      if (right == NO) {
+        return this;
+      }
+      if (this == NO) {
+        return right;
+      }
+      if (this == NULL) {
+        if (right == NO_NULL) {
+          return NULL;
+        } else {
+          return YES_NULL;
+        }
+      }
+      if (right == NULL) {
+        if (this == NO_NULL) {
+          return NULL;
+        } else {
+          return YES_NULL;
+        }
+      }
+      return YES_NO_NULL;
+    }
+
+    /**
+     * Compute logical AND between the two values.
+     * @param right the other argument or null
+     * @return the result
+     */
+    public TruthValue and(TruthValue right) {
+      if (right == null || right == this) {
+        return this;
+      }
+      if (right == NO || this == NO) {
+        return NO;
+      }
+      if (right == NO_NULL || this == NO_NULL) {
+        return NO_NULL;
+      }
+      if (right == YES) {
+        return this;
+      }
+      if (this == YES) {
+        return right;
+      }
+      if (this == NULL) {
+        if (right == YES_NULL) {
+          return NULL;
+        } else {
+          return NO_NULL;
+        }
+      }
+      if (right == NULL) {
+        if (this == YES_NULL) {
+          return NULL;
+        } else {
+          return NO_NULL;
+        }
+      }
+      return YES_NO_NULL;
+    }
+
+    public TruthValue not() {
+      switch (this) {
+        case NO:
+          return YES;
+        case YES:
+          return NO;
+        case NULL:
+        case YES_NO:
+        case YES_NO_NULL:
+          return this;
+        case NO_NULL:
+          return YES_NULL;
+        case YES_NULL:
+          return NO_NULL;
+        default:
+          throw new IllegalArgumentException("Unknown value: " + this);
+      }
+    }
+
+    /**
+     * Does the RecordReader need to include this set of records?
+     * @return true unless none of the rows qualify
+     */
+    public boolean isNeeded() {
+      switch (this) {
+        case NO:
+        case NULL:
+        case NO_NULL:
+          return false;
+        default:
+          return true;
+      }
+    }
+  }
+
+  /**
+   * Get the leaf predicates that are required to evaluate the predicate. The
+   * list will have the duplicates removed.
+   * @return the list of leaf predicates
+   */
+  public List<PredicateLeaf> getLeaves();
+
+  /**
+   * Get the expression tree. This should only needed for file formats that
+   * need to translate the expression to an internal form.
+   */
+  public ExpressionTree getExpression();
+
+  /**
+   * Evaluate the entire predicate based on the values for the leaf predicates.
+   * @param leaves the value of each leaf predicate
+   * @return the value of hte entire predicate
+   */
+  public TruthValue evaluate(TruthValue[] leaves);
+
+  /**
+   * A builder object for contexts outside of Hive where it isn't easy to
+   * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot
+   * before adding any leaves.
+   */
+  public interface Builder {
+
+    /**
+     * Start building an or operation and push it on the stack.
+     * @return this
+     */
+    public Builder startOr();
+
+    /**
+     * Start building an and operation and push it on the stack.
+     * @return this
+     */
+    public Builder startAnd();
+
+    /**
+     * Start building a not operation and push it on the stack.
+     * @return this
+     */
+    public Builder startNot();
+
+    /**
+     * Finish the current operation and pop it off of the stack. Each start
+     * call must have a matching end.
+     * @return this
+     */
+    public Builder end();
+
+    /**
+     * Add a less than leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    public Builder lessThan(String column, PredicateLeaf.Type type,
+                            Object literal);
+
+    /**
+     * Add a less than equals leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    public Builder lessThanEquals(String column, PredicateLeaf.Type type,
+                                  Object literal);
+
+    /**
+     * Add an equals leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    public Builder equals(String column, PredicateLeaf.Type type,
+                          Object literal);
+
+    /**
+     * Add a null safe equals leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    public Builder nullSafeEquals(String column, PredicateLeaf.Type type,
+                                  Object literal);
+
+    /**
+     * Add an in leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    public Builder in(String column, PredicateLeaf.Type type,
+                      Object... literal);
+
+    /**
+     * Add an is null leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @return this
+     */
+    public Builder isNull(String column, PredicateLeaf.Type type);
+
+    /**
+     * Add a between leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param lower the literal
+     * @param upper the literal
+     * @return this
+     */
+    public Builder between(String column, PredicateLeaf.Type type,
+                           Object lower, Object upper);
+
+    /**
+     * Add a truth value to the expression.
+     * @param truth
+     * @return this
+     */
+    public Builder literal(TruthValue truth);
+
+    /**
+     * Build and return the SearchArgument that has been defined. All of the
+     * starts must have been ended before this call.
+     * @return the new SearchArgument
+     */
+    public SearchArgument build();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
new file mode 100644
index 0000000..0778935
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+/**
+ * A factory for creating SearchArguments.
+ */
+public class SearchArgumentFactory {
+  public static SearchArgument.Builder newBuilder() {
+    return new SearchArgumentImpl.BuilderImpl();
+  }
+}


Mime
View raw message