hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1477796 [1/2] - in /hive/branches/vectorization: ./ common/ common/src/java/org/apache/hadoop/hive/conf/ conf/ data/files/ hcatalog/ jdbc/src/java/org/apache/hive/jdbc/ jdbc/src/test/org/apache/hive/jdbc/ ql/src/java/org/apache/hadoop/hive...
Date Tue, 30 Apr 2013 19:45:40 GMT
Author: hashutosh
Date: Tue Apr 30 19:45:38 2013
New Revision: 1477796

URL: http://svn.apache.org/r1477796
Log:
Merged in latest trunk

Added:
    hive/branches/vectorization/data/files/array_table.txt
      - copied unchanged from r1477793, hive/trunk/data/files/array_table.txt
    hive/branches/vectorization/data/files/map_table.txt
      - copied unchanged from r1477793, hive/trunk/data/files/map_table.txt
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
      - copied unchanged from r1477793, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
    hive/branches/vectorization/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q
      - copied unchanged from r1477793, hive/trunk/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q
    hive/branches/vectorization/ql/src/test/queries/clientpositive/ptf_register_tblfn.q
      - copied unchanged from r1477793, hive/trunk/ql/src/test/queries/clientpositive/ptf_register_tblfn.q
    hive/branches/vectorization/ql/src/test/results/clientpositive/insert_overwrite_local_directory_1.q.out
      - copied unchanged from r1477793, hive/trunk/ql/src/test/results/clientpositive/insert_overwrite_local_directory_1.q.out
    hive/branches/vectorization/ql/src/test/results/clientpositive/ptf_register_tblfn.q.out
      - copied unchanged from r1477793, hive/trunk/ql/src/test/results/clientpositive/ptf_register_tblfn.q.out
Removed:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFFunctionInfo.java
Modified:
    hive/branches/vectorization/   (props changed)
    hive/branches/vectorization/common/build.xml
    hive/branches/vectorization/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/branches/vectorization/conf/hive-default.xml.template
    hive/branches/vectorization/hcatalog/build.xml
    hive/branches/vectorization/jdbc/src/java/org/apache/hive/jdbc/HiveDatabaseMetaData.java
    hive/branches/vectorization/jdbc/src/test/org/apache/hive/jdbc/TestJdbcDriver2.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java
    hive/branches/vectorization/ql/src/test/queries/clientpositive/input4.q
    hive/branches/vectorization/ql/src/test/queries/clientpositive/join39.q
    hive/branches/vectorization/ql/src/test/queries/clientpositive/join40.q
    hive/branches/vectorization/ql/src/test/queries/clientpositive/mapjoin1.q
    hive/branches/vectorization/ql/src/test/results/beelinepositive/join39.q.out
    hive/branches/vectorization/ql/src/test/results/beelinepositive/join40.q.out
    hive/branches/vectorization/ql/src/test/results/beelinepositive/mapjoin1.q.out
    hive/branches/vectorization/ql/src/test/results/clientpositive/input4.q.out
    hive/branches/vectorization/ql/src/test/results/clientpositive/show_functions.q.out
    hive/branches/vectorization/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java

Propchange: hive/branches/vectorization/
------------------------------------------------------------------------------
  Merged /hive/trunk:r1476329-1477793

Modified: hive/branches/vectorization/common/build.xml
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/common/build.xml?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/common/build.xml (original)
+++ hive/branches/vectorization/common/build.xml Tue Apr 30 19:45:38 2013
@@ -25,6 +25,7 @@ to call at top-level: ant deploy-contrib
 <project name="common" default="jar">
 
   <property name="src.dir"  location="${basedir}/src/java"/>
+  <property name="src.gen.dir"  location="${basedir}/src/gen"/>
   <import file="../build-common.xml"/>
 
   <target name="compile" depends="init, setup, ivy-retrieve">
@@ -36,7 +37,7 @@ to call at top-level: ant deploy-contrib
     </exec>
     <javac
      encoding="${build.encoding}"
-     srcdir="${src.dir}"
+     srcdir="${src.dir}:${src.gen.dir}"
      includes="**/*.java"
      destdir="${build.classes}"
      debug="${javac.debug}"

Modified: hive/branches/vectorization/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/branches/vectorization/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue Apr 30 19:45:38 2013
@@ -415,8 +415,6 @@ public class HiveConf extends Configurat
     HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000),
     HIVEJOINCACHESIZE("hive.join.cache.size", 25000),
     HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100),
-    HIVEMAPJOINROWSIZE("hive.mapjoin.size.key", 10000),
-    HIVEMAPJOINCACHEROWS("hive.mapjoin.cache.numrows", 25000),
     HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),
     HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5),
     HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3),

Modified: hive/branches/vectorization/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/conf/hive-default.xml.template?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/conf/hive-default.xml.template (original)
+++ hive/branches/vectorization/conf/hive-default.xml.template Tue Apr 30 19:45:38 2013
@@ -595,12 +595,6 @@
 </property>
 
 <property>
-  <name>hive.mapjoin.cache.numrows</name>
-  <value>25000</value>
-  <description>How many rows should be cached by jdbm for map join. </description>
-</property>
-
-<property>
   <name>hive.optimize.skewjoin</name>
   <value>false</value>
   <description>Whether to enable skew join optimization.

Modified: hive/branches/vectorization/hcatalog/build.xml
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/hcatalog/build.xml?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/hcatalog/build.xml (original)
+++ hive/branches/vectorization/hcatalog/build.xml Tue Apr 30 19:45:38 2013
@@ -91,18 +91,23 @@
     <target name="gen-test" description="Generate tests, a no-op for hcat"/>
 
     <target name="test" depends="jar" description="run unit tests">
-        <ant target="test" dir="core" inheritAll="false"/>
-        <ant target="test" dir="hcatalog-pig-adapter" inheritAll="false"/>
-        <ant target="test" dir="server-extensions" inheritAll="false"/>
-        <ant target="test" dir="webhcat/svr" inheritAll="false"/>
-        <ant target="test" dir="webhcat/java-client" inheritAll="false"/>
-        <ant target="test" dir="storage-handlers/hbase" inheritAll="false"/>
-        <!-- One checkstyle run for the whole repo. Runs after junit tests
-        to piggyback on resolved jars. -->
-        <path id="checkstyle.class.path">
-          <fileset dir="core/build/lib/test"/>
-        </path>
-        <antcall target="checkstyle" inheritRefs="true"/>
+        <!-- Placed in a parallel structure so that the tests keep going
+             even if some fail.  Otherwise a failure in one of the earlier ant
+             call terminates the target and the rest do not run.  -->
+        <parallel threadCount="1">
+            <ant target="test" dir="core" inheritAll="false"/>
+            <ant target="test" dir="hcatalog-pig-adapter" inheritAll="false"/>
+            <ant target="test" dir="server-extensions" inheritAll="false"/>
+            <ant target="test" dir="webhcat/svr" inheritAll="false"/>
+            <ant target="test" dir="webhcat/java-client" inheritAll="false"/>
+            <ant target="test" dir="storage-handlers/hbase" inheritAll="false"/>
+            <!-- One checkstyle run for the whole repo. Runs after junit tests
+            to piggyback on resolved jars. -->
+            <path id="checkstyle.class.path">
+                <fileset dir="core/build/lib/test"/>
+            </path>
+            <antcall target="checkstyle" inheritRefs="true"/>
+        </parallel>
     </target>
 
     <target name="compile-test" depends="jar" description="compile unit tests">

Modified: hive/branches/vectorization/jdbc/src/java/org/apache/hive/jdbc/HiveDatabaseMetaData.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/jdbc/src/java/org/apache/hive/jdbc/HiveDatabaseMetaData.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/jdbc/src/java/org/apache/hive/jdbc/HiveDatabaseMetaData.java (original)
+++ hive/branches/vectorization/jdbc/src/java/org/apache/hive/jdbc/HiveDatabaseMetaData.java Tue Apr 30 19:45:38 2013
@@ -28,12 +28,16 @@ import java.util.Comparator;
 import java.util.jar.Attributes;
 
 import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hive.service.cli.GetInfoType;
+import org.apache.hive.service.cli.thrift.TCLIService;
 import org.apache.hive.service.cli.thrift.TGetCatalogsReq;
 import org.apache.hive.service.cli.thrift.TGetCatalogsResp;
 import org.apache.hive.service.cli.thrift.TGetColumnsReq;
 import org.apache.hive.service.cli.thrift.TGetColumnsResp;
 import org.apache.hive.service.cli.thrift.TGetFunctionsReq;
 import org.apache.hive.service.cli.thrift.TGetFunctionsResp;
+import org.apache.hive.service.cli.thrift.TGetInfoReq;
+import org.apache.hive.service.cli.thrift.TGetInfoResp;
 import org.apache.hive.service.cli.thrift.TGetSchemasReq;
 import org.apache.hive.service.cli.thrift.TGetSchemasResp;
 import org.apache.hive.service.cli.thrift.TGetTableTypesReq;
@@ -42,7 +46,6 @@ import org.apache.hive.service.cli.thrif
 import org.apache.hive.service.cli.thrift.TGetTablesResp;
 import org.apache.hive.service.cli.thrift.TGetTypeInfoReq;
 import org.apache.hive.service.cli.thrift.TGetTypeInfoResp;
-import org.apache.hive.service.cli.thrift.TCLIService;
 import org.apache.hive.service.cli.thrift.TSessionHandle;
 import org.apache.thrift.TException;
 
@@ -249,8 +252,17 @@ public class HiveDatabaseMetaData implem
   }
 
   public String getDatabaseProductVersion() throws SQLException {
-    // TODO: Fetch this from the server side
-    return "0.10.0";
+
+    TGetInfoReq req = new TGetInfoReq(sessHandle, GetInfoType.CLI_DBMS_VER.toTGetInfoType());
+    TGetInfoResp resp;
+    try {
+      resp = client.GetInfo(req);
+    } catch (TException e) {
+      throw new SQLException(e.getMessage(), "08S01", e);
+    }
+    Utils.verifySuccess(resp.getStatus());
+
+    return resp.getInfoValue().getStringValue();
   }
 
   public int getDefaultTransactionIsolation() throws SQLException {

Modified: hive/branches/vectorization/jdbc/src/test/org/apache/hive/jdbc/TestJdbcDriver2.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/jdbc/src/test/org/apache/hive/jdbc/TestJdbcDriver2.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/jdbc/src/test/org/apache/hive/jdbc/TestJdbcDriver2.java (original)
+++ hive/branches/vectorization/jdbc/src/test/org/apache/hive/jdbc/TestJdbcDriver2.java Tue Apr 30 19:45:38 2013
@@ -35,11 +35,13 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
+import java.util.regex.Pattern;
 
 import junit.framework.TestCase;
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hive.common.util.HiveVersionInfo;
 
 /**
  * TestJdbcDriver2
@@ -833,7 +835,11 @@ public class TestJdbcDriver2 extends Tes
     DatabaseMetaData meta = con.getMetaData();
 
     assertEquals("Hive", meta.getDatabaseProductName());
-    assertEquals("0.10.0", meta.getDatabaseProductVersion());
+    assertEquals(HiveVersionInfo.getVersion(), meta.getDatabaseProductVersion());
+    assertEquals(System.getProperty("hive.version"), meta.getDatabaseProductVersion());
+    assertTrue("verifying hive version pattern. got " + meta.getDatabaseProductVersion(),
+        Pattern.matches("\\d+\\.\\d+\\.\\d+.*", meta.getDatabaseProductVersion()) );
+
     assertEquals(DatabaseMetaData.sqlStateSQL99, meta.getSQLStateType());
     assertFalse(meta.supportsCatalogsInTableDefinitions());
     assertFalse(meta.supportsSchemasInTableDefinitions());

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java Tue Apr 30 19:45:38 2013
@@ -53,7 +53,6 @@ public abstract class AbstractMapJoinOpe
   protected transient List<ObjectInspector>[] joinKeysStandardObjectInspectors;
 
   protected transient byte posBigTable = -1; // one of the tables that is not in memory
-  transient int mapJoinRowsKey; // rows for a given key
 
   protected transient RowContainer<ArrayList<Object>> emptyList = null;
 
@@ -104,9 +103,6 @@ public abstract class AbstractMapJoinOpe
         !hasFilter(posBigTable), reporter);
     storage[posBigTable] = bigPosRC;
 
-    mapJoinRowsKey = HiveConf.getIntVar(hconf,
-        HiveConf.ConfVars.HIVEMAPJOINROWSIZE);
-
     List<? extends StructField> structFields = ((StructObjectInspector) outputObjInspector)
         .getAllStructFieldRefs();
     if (conf.getOutputColumnNames().size() < structFields.size()) {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java Tue Apr 30 19:45:38 2013
@@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver;
+import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction;
 
 /**
  * FunctionInfo.
@@ -32,6 +34,8 @@ public class FunctionInfo {
 
   private final boolean isNative;
 
+  private final boolean isInternalTableFunction;
+
   private final String displayName;
 
   private GenericUDF genericUDF;
@@ -40,11 +44,14 @@ public class FunctionInfo {
 
   private GenericUDAFResolver genericUDAFResolver;
 
+  private Class<? extends TableFunctionResolver>  tableFunctionResolver;
+
   public FunctionInfo(boolean isNative, String displayName,
       GenericUDF genericUDF) {
     this.isNative = isNative;
     this.displayName = displayName;
     this.genericUDF = genericUDF;
+    this.isInternalTableFunction = false;
   }
 
   public FunctionInfo(boolean isNative, String displayName,
@@ -52,6 +59,7 @@ public class FunctionInfo {
     this.isNative = isNative;
     this.displayName = displayName;
     this.genericUDAFResolver = genericUDAFResolver;
+    this.isInternalTableFunction = false;
   }
 
   public FunctionInfo(boolean isNative, String displayName,
@@ -59,6 +67,16 @@ public class FunctionInfo {
     this.isNative = isNative;
     this.displayName = displayName;
     this.genericUDTF = genericUDTF;
+    this.isInternalTableFunction = false;
+  }
+
+  public FunctionInfo(String displayName, Class<? extends TableFunctionResolver> tFnCls)
+  {
+    this.displayName = displayName;
+    this.tableFunctionResolver = tFnCls;
+    PartitionTableFunctionDescription def = tableFunctionResolver.getAnnotation(PartitionTableFunctionDescription.class);
+    this.isNative = (def == null) ? false : def.isInternal();
+    this.isInternalTableFunction = isNative;
   }
 
   /**
@@ -90,6 +108,8 @@ public class FunctionInfo {
     return genericUDAFResolver;
   }
 
+
+
   /**
    * Get the Class of the UDF.
    */
@@ -109,6 +129,9 @@ public class FunctionInfo {
     } else if (isGenericUDTF()) {
       return genericUDTF.getClass();
     }
+    if(isTableFunction()) {
+      return this.tableFunctionResolver;
+    }
     return null;
   }
 
@@ -131,6 +154,14 @@ public class FunctionInfo {
   }
 
   /**
+   * Internal table functions cannot be used in the language.
+   * {@link WindowingTableFunction}
+   */
+  public boolean isInternalTableFunction() {
+    return isInternalTableFunction;
+  }
+
+  /**
    * @return TRUE if the function is a GenericUDF
    */
   public boolean isGenericUDF() {
@@ -150,4 +181,11 @@ public class FunctionInfo {
   public boolean isGenericUDTF() {
     return null != genericUDTF;
   }
+
+  /**
+   * @return TRUE if the function is a Table Function
+   */
+  public boolean isTableFunction() {
+    return null != tableFunctionResolver;
+  }
 }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Tue Apr 30 19:45:38 2013
@@ -182,7 +182,6 @@ public final class FunctionRegistry {
   public static final String NOOP_TABLE_FUNCTION = "noop";
   public static final String NOOP_MAP_TABLE_FUNCTION = "noopwithmap";
 
-  static Map<String, PTFFunctionInfo> tableFunctions = Collections.synchronizedMap(new LinkedHashMap<String, PTFFunctionInfo>());
   static Map<String, WindowFunctionInfo> windowFunctions = Collections.synchronizedMap(new LinkedHashMap<String, WindowFunctionInfo>());
 
   /*
@@ -1294,6 +1293,9 @@ public final class FunctionRegistry {
       FunctionRegistry.registerTemporaryGenericUDAF(
         functionName, (GenericUDAFResolver)
         ReflectionUtils.newInstance(udfClass, null));
+    } else if(TableFunctionResolver.class.isAssignableFrom(udfClass)) {
+      FunctionRegistry.registerTableFunction(
+        functionName, (Class<? extends TableFunctionResolver>)udfClass);
     } else {
       return false;
     }
@@ -1406,14 +1408,17 @@ public final class FunctionRegistry {
 
   public static boolean isTableFunction(String name)
   {
-    PTFFunctionInfo tFInfo = tableFunctions.get(name.toLowerCase());
-     return tFInfo != null && !tFInfo.isInternal();
+    FunctionInfo tFInfo = mFunctions.get(name.toLowerCase());
+    return tFInfo != null && !tFInfo.isInternalTableFunction() && tFInfo.isTableFunction();
   }
 
   public static TableFunctionResolver getTableFunctionResolver(String name)
   {
-    PTFFunctionInfo tfInfo = tableFunctions.get(name.toLowerCase());
-    return (TableFunctionResolver) ReflectionUtils.newInstance(tfInfo.getFunctionResolver(), null);
+    FunctionInfo tfInfo = mFunctions.get(name.toLowerCase());
+    if(tfInfo.isTableFunction()) {
+      return (TableFunctionResolver) ReflectionUtils.newInstance(tfInfo.getFunctionClass(), null);
+    }
+    return null;
   }
 
   public static TableFunctionResolver getWindowingTableFunction()
@@ -1428,8 +1433,8 @@ public final class FunctionRegistry {
 
   public static void registerTableFunction(String name, Class<? extends TableFunctionResolver> tFnCls)
   {
-    PTFFunctionInfo tInfo = new PTFFunctionInfo(name, tFnCls);
-    tableFunctions.put(name.toLowerCase(), tInfo);
+    FunctionInfo tInfo = new FunctionInfo(name, tFnCls);
+    mFunctions.put(name.toLowerCase(), tInfo);
   }
 
 }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Tue Apr 30 19:45:38 2013
@@ -151,35 +151,13 @@ public class GroupByOperator extends Ope
   private List<FastBitSet> groupingSetsBitSet;
   transient private List<Object> newKeysGroupingSets;
 
-  /**
-   * This is used to store the position and field names for variable length
-   * fields.
-   **/
-  class varLenFields {
-    int aggrPos;
-    List<Field> fields;
-
-    varLenFields(int aggrPos, List<Field> fields) {
-      this.aggrPos = aggrPos;
-      this.fields = fields;
-    }
-
-    int getAggrPos() {
-      return aggrPos;
-    }
-
-    List<Field> getFields() {
-      return fields;
-    }
-  };
-
   // for these positions, some variable primitive type (String) is used, so size
   // cannot be estimated. sample it at runtime.
   transient List<Integer> keyPositionsSize;
 
   // for these positions, some variable primitive type (String) is used for the
   // aggregation classes
-  transient List<varLenFields> aggrPositions;
+  transient List<Field>[] aggrPositions;
 
   transient int fixedRowSize;
   transient long maxHashTblMemory;
@@ -383,7 +361,7 @@ public class GroupByOperator extends Ope
       aggregations = newAggregations();
       hashAggr = true;
       keyPositionsSize = new ArrayList<Integer>();
-      aggrPositions = new ArrayList<varLenFields>();
+      aggrPositions = new List[aggregations.length];
       groupbyMapAggrInterval = HiveConf.getIntVar(hconf,
           HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
 
@@ -523,21 +501,10 @@ public class GroupByOperator extends Ope
     }
 
     if (c.isInstance(new String()) || c.isInstance(new ByteArrayRef())) {
-      int idx = 0;
-      varLenFields v = null;
-      for (idx = 0; idx < aggrPositions.size(); idx++) {
-        v = aggrPositions.get(idx);
-        if (v.getAggrPos() == pos) {
-          break;
-        }
+      if (aggrPositions[pos] == null) {
+        aggrPositions[pos] = new ArrayList<Field>();
       }
-
-      if (idx == aggrPositions.size()) {
-        v = new varLenFields(pos, new ArrayList<Field>());
-        aggrPositions.add(v);
-      }
-
-      v.getFields().add(f);
+      aggrPositions[pos].add(f);
       return javaObjectOverHead;
     }
 
@@ -582,9 +549,11 @@ public class GroupByOperator extends Ope
     for (int i = 0; i < aggregationEvaluators.length; i++) {
 
       fixedRowSize += javaObjectOverHead;
-      Class<? extends AggregationBuffer> agg = aggregationEvaluators[i]
-          .getNewAggregationBuffer().getClass();
-      Field[] fArr = ObjectInspectorUtils.getDeclaredNonStaticFields(agg);
+      AggregationBuffer agg = aggregationEvaluators[i].getNewAggregationBuffer();
+      if (GenericUDAFEvaluator.isEstimable(agg)) {
+        continue;
+      }
+      Field[] fArr = ObjectInspectorUtils.getDeclaredNonStaticFields(agg.getClass());
       for (Field f : fArr) {
         fixedRowSize += getSize(i, f.getType(), f);
       }
@@ -968,29 +937,15 @@ public class GroupByOperator extends Ope
         }
       }
 
-      AggregationBuffer[] aggs = null;
-      if (aggrPositions.size() > 0) {
-        KeyWrapper newKeyProber = newKeys.copyKey();
-        aggs = hashAggregations.get(newKeyProber);
-      }
-
-      for (varLenFields v : aggrPositions) {
-        int aggrPos = v.getAggrPos();
-        List<Field> fieldsVarLen = v.getFields();
-        AggregationBuffer agg = aggs[aggrPos];
-
-        try {
-          for (Field f : fieldsVarLen) {
-            Object o = f.get(agg);
-            if (o instanceof String){
-              totalVariableSize += ((String)o).length();
-            }
-            else if (o instanceof ByteArrayRef){
-              totalVariableSize += ((ByteArrayRef)o).getData().length;
-            }
-          }
-        } catch (IllegalAccessException e) {
-          assert false;
+      AggregationBuffer[] aggs = hashAggregations.get(newKeys);
+      for (int i = 0; i < aggs.length; i++) {
+        AggregationBuffer agg = aggs[i];
+        if (GenericUDAFEvaluator.isEstimable(agg)) {
+          totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer)agg).estimate();
+          continue;
+        }
+        if (aggrPositions[i] != null) {
+          totalVariableSize += estimateSize(agg, aggrPositions[i]);
         }
       }
 
@@ -1010,6 +965,24 @@ public class GroupByOperator extends Ope
     return false;
   }
 
+  private int estimateSize(AggregationBuffer agg, List<Field> fields) {
+    int length = 0;
+    for (Field f : fields) {
+      try {
+        Object o = f.get(agg);
+        if (o instanceof String){
+          length += ((String)o).length();
+        }
+        else if (o instanceof ByteArrayRef){
+          length += ((ByteArrayRef)o).getData().length;
+        }
+      } catch (Exception e) {
+        // continue.. null out the field?
+      }
+    }
+    return length;
+  }
+
   private void flush(boolean complete) throws HiveException {
 
     countAfterReport = 0;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java Tue Apr 30 19:45:38 2013
@@ -70,7 +70,6 @@ public class HashTableSinkOperator exten
   protected transient List<ObjectInspector>[] joinKeysStandardObjectInspectors;
 
   protected transient int posBigTableAlias = -1; // one of the tables that is not in memory
-  transient int mapJoinRowsKey; // rows for a given key
 
   protected transient RowContainer<ArrayList<Object>> emptyList = null;
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java Tue Apr 30 19:45:38 2013
@@ -1385,21 +1385,31 @@ public class RCFile {
 
       try {
         seek(position + 4); // skip escape
-        in.readFully(syncCheck);
-        int syncLen = sync.length;
-        for (int i = 0; in.getPos() < end; i++) {
-          int j = 0;
-          for (; j < syncLen; j++) {
-            if (sync[j] != syncCheck[(i + j) % syncLen]) {
-              break;
+
+        int prefix = sync.length;
+        int n = conf.getInt("io.bytes.per.checksum", 512);
+        byte[] buffer = new byte[prefix+n];
+        n = (int)Math.min(n, end - in.getPos());
+        /* fill array with a pattern that will never match sync */
+        Arrays.fill(buffer, (byte)(~sync[0])); 
+        while(n > 0 && (in.getPos() + n) <= end) {
+          position = in.getPos();
+          in.readFully(buffer, prefix, n);
+          /* the buffer has n+sync bytes */
+          for(int i = 0; i < n; i++) {
+            int j;
+            for(j = 0; j < sync.length && sync[j] == buffer[i+j]; j++) {
+              /* nothing */
+            }
+            if(j == sync.length) {
+              /* simplified from (position + (i - prefix) + sync.length) - SYNC_SIZE */
+              in.seek(position + i - SYNC_SIZE);
+              return;
             }
           }
-          if (j == syncLen) {
-            in.seek(in.getPos() - SYNC_SIZE); // position before
-            // sync
-            return;
-          }
-          syncCheck[i % syncLen] = in.readByte();
+          /* move the last 16 bytes to the prefix area */
+          System.arraycopy(buffer, buffer.length - prefix - 1, buffer, 0, prefix);
+          n = (int)Math.min(n, end - in.getPos());
         }
       } catch (ChecksumException e) { // checksum failure
         handleChecksumException(e);

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g Tue Apr 30 19:45:38 2013
@@ -172,7 +172,7 @@ tableSample
 tableSource
 @init { gParent.msgs.push("table source"); }
 @after { gParent.msgs.pop(); }
-    : tabname=tableName (ts=tableSample)? (alias=identifier)?
+    : tabname=tableName (ts=tableSample)? (KW_AS? alias=identifier)?
     -> ^(TOK_TABREF $tabname $ts? $alias?)
     ;
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Tue Apr 30 19:45:38 2013
@@ -1878,7 +1878,7 @@ destination
 @init { msgs.push("destination specification"); }
 @after { msgs.pop(); }
    :
-     KW_LOCAL KW_DIRECTORY StringLiteral -> ^(TOK_LOCAL_DIR StringLiteral)
+     KW_LOCAL KW_DIRECTORY StringLiteral tableRowFormat? tableFileFormat? -> ^(TOK_LOCAL_DIR StringLiteral tableRowFormat? tableFileFormat?)
    | KW_DIRECTORY StringLiteral -> ^(TOK_DIR StringLiteral)
    | KW_TABLE tableOrPartition -> tableOrPartition
    ;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java Tue Apr 30 19:45:38 2013
@@ -51,6 +51,7 @@ public class QB {
   private boolean isQuery;
   private boolean isAnalyzeRewrite;
   private CreateTableDesc tblDesc = null; // table descriptor of the final
+  private CreateTableDesc localDirectoryDesc = null ;
 
   // used by PTFs
   /*
@@ -227,6 +228,14 @@ public class QB {
     tblDesc = desc;
   }
 
+  public CreateTableDesc getLLocalDirectoryDesc() {
+    return localDirectoryDesc;
+  }
+
+  public void setLocalDirectoryDesc(CreateTableDesc localDirectoryDesc) {
+    this.localDirectoryDesc = localDirectoryDesc;
+  }
+
   /**
    * Whether this QB is for a CREATE-TABLE-AS-SELECT.
    */

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Apr 30 19:45:38 2013
@@ -1192,6 +1192,10 @@ public class SemanticAnalyzer extends Ba
         }
       }
 
+      RowFormatParams rowFormatParams = new RowFormatParams();
+      AnalyzeCreateCommonVars shared = new AnalyzeCreateCommonVars();
+      StorageFormat storageFormat = new StorageFormat();
+
       LOG.info("Get metadata for destination tables");
       // Go over all the destination structures and populate the related
       // metadata
@@ -1279,6 +1283,45 @@ public class SemanticAnalyzer extends Ba
           }
           qb.getMetaData().setDestForAlias(name, fname,
               (ast.getToken().getType() == HiveParser.TOK_DIR));
+
+          CreateTableDesc localDirectoryDesc = new CreateTableDesc();
+          boolean localDirectoryDescIsSet = false;
+          int numCh = ast.getChildCount();
+          for (int num = 1; num < numCh ; num++){
+            ASTNode child = (ASTNode) ast.getChild(num);
+            if (ast.getChild(num) != null){
+              switch (child.getToken().getType()) {
+                case HiveParser.TOK_TABLEROWFORMAT:
+                  rowFormatParams.analyzeRowFormat(shared, child);
+                  localDirectoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
+                  localDirectoryDesc.setLineDelim(rowFormatParams.lineDelim);
+                  localDirectoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
+                  localDirectoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
+                  localDirectoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
+                  localDirectoryDescIsSet=true;
+                  break;
+                case HiveParser.TOK_TABLESERIALIZER:
+                  ASTNode serdeChild = (ASTNode) child.getChild(0);
+                  shared.serde = unescapeSQLString(serdeChild.getChild(0).getText());
+                  localDirectoryDesc.setSerName(shared.serde);
+                  localDirectoryDescIsSet=true;
+                  break;
+                case HiveParser.TOK_TBLSEQUENCEFILE:
+                case HiveParser.TOK_TBLTEXTFILE:
+                case HiveParser.TOK_TBLRCFILE:
+                case HiveParser.TOK_TBLORCFILE:
+                case HiveParser.TOK_TABLEFILEFORMAT:
+                  storageFormat.fillStorageFormat(child, shared);
+                  localDirectoryDesc.setOutputFormat(storageFormat.outputFormat);
+                  localDirectoryDesc.setSerName(shared.serde);
+                  localDirectoryDescIsSet=true;
+                  break;
+              }
+            }
+          }
+          if (localDirectoryDescIsSet){
+            qb.setLocalDirectoryDesc(localDirectoryDesc);
+          }
           break;
         }
         default:
@@ -5180,8 +5223,7 @@ public class SemanticAnalyzer extends Ba
           String fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
           table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat);
         } else {
-          table_desc = PlanUtils.getDefaultTableDesc(Integer
-              .toString(Utilities.ctrlaCode), cols, colTypes, false);
+          table_desc = PlanUtils.getDefaultTableDesc(qb.getLLocalDirectoryDesc(), cols, colTypes);
         }
       } else {
         table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java Tue Apr 30 19:45:38 2013
@@ -100,6 +100,56 @@ public final class PlanUtils {
     }
   }
 
+  public static TableDesc getDefaultTableDesc(CreateTableDesc localDirectoryDesc,
+      String cols, String colTypes ) {
+    TableDesc tableDesc = getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), cols,
+        colTypes, false);;
+    if (localDirectoryDesc == null) {
+      return tableDesc;
+    }
+
+    try {
+      if (localDirectoryDesc.getFieldDelim() != null) {
+        tableDesc.getProperties().setProperty(
+            serdeConstants.FIELD_DELIM, localDirectoryDesc.getFieldDelim());
+        tableDesc.getProperties().setProperty(
+            serdeConstants.SERIALIZATION_FORMAT, localDirectoryDesc.getFieldDelim());
+      }
+      if (localDirectoryDesc.getLineDelim() != null) {
+        tableDesc.getProperties().setProperty(
+            serdeConstants.LINE_DELIM, localDirectoryDesc.getLineDelim());
+      }
+      if (localDirectoryDesc.getCollItemDelim() != null) {
+        tableDesc.getProperties().setProperty(
+            serdeConstants.COLLECTION_DELIM, localDirectoryDesc.getCollItemDelim());
+      }
+      if (localDirectoryDesc.getMapKeyDelim() != null) {
+        tableDesc.getProperties().setProperty(
+            serdeConstants.MAPKEY_DELIM, localDirectoryDesc.getMapKeyDelim());
+      }
+      if (localDirectoryDesc.getFieldEscape() !=null) {
+        tableDesc.getProperties().setProperty(
+            serdeConstants.ESCAPE_CHAR, localDirectoryDesc.getFieldEscape());
+      }
+      if (localDirectoryDesc.getSerName() != null) {
+        tableDesc.setSerdeClassName(localDirectoryDesc.getSerName());
+        tableDesc.getProperties().setProperty(
+            serdeConstants.SERIALIZATION_LIB, localDirectoryDesc.getSerName());
+        tableDesc.setDeserializerClass(
+            (Class<? extends Deserializer>) Class.forName(localDirectoryDesc.getSerName()));
+      }
+      if (localDirectoryDesc.getOutputFormat() != null){
+          tableDesc.setOutputFileFormatClass(Class.forName(localDirectoryDesc.getOutputFormat()));
+      }
+    } catch (ClassNotFoundException e) {
+      // mimicking behaviour in CreateTableDesc tableDesc creation
+      // returning null table description for output.
+      e.printStackTrace();
+      return null;
+    }
+    return tableDesc;
+  }
+
   /**
    * Generate the table descriptor of MetadataTypedColumnsetSerDe with the
    * separatorCode and column names (comma separated string).

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java Tue Apr 30 19:45:38 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UD
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -303,6 +304,14 @@ public class GenericUDAFAverage extends 
       }
     }
 
+    @AggregationType(estimable = true)
+    static class AverageAgg extends AbstractAggregationBuffer {
+      long count;
+      double sum;
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES2 * 2; }
+    };
+
     @Override
     public void reset(AggregationBuffer aggregation) throws HiveException {
       doReset((AverageAggregationBuffer<TYPE>)aggregation);

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java Tue Apr 30 19:45:38 2013
@@ -147,7 +147,7 @@ public class GenericUDAFBridge extends A
     }
 
     /** class for storing UDAFEvaluator value. */
-    static class UDAFAgg implements AggregationBuffer {
+    static class UDAFAgg extends AbstractAggregationBuffer {
       UDAFEvaluator ueObject;
 
       UDAFAgg(UDAFEvaluator ueObject) {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java Tue Apr 30 19:45:38 2013
@@ -99,7 +99,7 @@ public class GenericUDAFCollectSet exten
       }
     }
     
-    static class MkArrayAggregationBuffer implements AggregationBuffer {
+    static class MkArrayAggregationBuffer extends AbstractAggregationBuffer {
       Set<Object> container;
     }
     

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java Tue Apr 30 19:45:38 2013
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -185,11 +186,17 @@ public class GenericUDAFComputeStats ext
             foi);
     }
 
-    public static class BooleanStatsAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    public static class BooleanStatsAgg extends AbstractAggregationBuffer {
       public String columnType;                        /* Datatype of column */
       public long countTrues;  /* Count of number of true values seen so far */
       public long countFalses; /* Count of number of false values seen so far */
       public long countNulls;  /* Count of number of null values seen so far */
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.primitive2() * 3 + model.lengthFor(columnType);
+      }
     };
 
     @Override
@@ -426,7 +433,9 @@ public class GenericUDAFComputeStats ext
       }
     }
 
-    public static class LongStatsAgg implements AggregationBuffer {
+
+    @AggregationType(estimable = true)
+    public static class LongStatsAgg extends AbstractAggregationBuffer {
       public String columnType;
       public long min;                              /* Minimum value seen so far */
       public long max;                              /* Maximum value seen so far */
@@ -434,6 +443,12 @@ public class GenericUDAFComputeStats ext
       public LongNumDistinctValueEstimator numDV;    /* Distinct value estimator */
       public boolean firstItem;                     /* First item in the aggBuf? */
       public int numBitVectors;
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.primitive1() * 2 + model.primitive2() * 3 +
+            model.lengthFor(columnType) + model.lengthFor(numDV);
+      }
     };
 
     @Override
@@ -738,7 +753,8 @@ public class GenericUDAFComputeStats ext
       }
     }
 
-    public static class DoubleStatsAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    public static class DoubleStatsAgg extends AbstractAggregationBuffer {
       public String columnType;
       public double min;                            /* Minimum value seen so far */
       public double max;                            /* Maximum value seen so far */
@@ -746,6 +762,12 @@ public class GenericUDAFComputeStats ext
       public DoubleNumDistinctValueEstimator numDV;  /* Distinct value estimator */
       public boolean firstItem;                     /* First item in the aggBuf? */
       public int numBitVectors;
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.primitive1() * 2 + model.primitive2() * 3 +
+            model.lengthFor(columnType) + model.lengthFor(numDV);
+      }
     };
 
     @Override
@@ -1061,7 +1083,8 @@ public class GenericUDAFComputeStats ext
       }
     }
 
-    public static class StringStatsAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    public static class StringStatsAgg extends AbstractAggregationBuffer {
       public String columnType;
       public long maxLength;                           /* Maximum length seen so far */
       public long sumLength;             /* Sum of lengths of all values seen so far */
@@ -1070,6 +1093,12 @@ public class GenericUDAFComputeStats ext
       public StringNumDistinctValueEstimator numDV;      /* Distinct value estimator */
       public int numBitVectors;
       public boolean firstItem;
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.primitive1() * 2 + model.primitive2() * 4 +
+            model.lengthFor(columnType) + model.lengthFor(numDV);
+      }
     };
 
     @Override
@@ -1377,12 +1406,18 @@ public class GenericUDAFComputeStats ext
       }
     }
 
-    public static class BinaryStatsAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    public static class BinaryStatsAgg extends AbstractAggregationBuffer {
       public String columnType;
       public long maxLength;                           /* Maximum length seen so far */
       public long sumLength;             /* Sum of lengths of all values seen so far */
       public long count;                          /* Count of all values seen so far */
       public long countNulls;          /* Count of number of null values seen so far */
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.primitive2() * 4 + model.lengthFor(columnType);
+      }
     };
 
     @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java Tue Apr 30 19:45:38 2013
@@ -400,7 +400,7 @@ public class GenericUDAFContextNGrams im
 
 
     // Aggregation buffer methods. 
-    static class NGramAggBuf implements AggregationBuffer {
+    static class NGramAggBuf extends AbstractAggregationBuffer {
       ArrayList<String> context;
       NGramEstimator nge;
     };

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java Tue Apr 30 19:45:38 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -247,13 +248,16 @@ public class GenericUDAFCorrelation exte
       }
     }
 
-    static class StdAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class StdAgg extends AbstractAggregationBuffer {
       long count; // number n of elements
       double xavg; // average of x elements
       double yavg; // average of y elements
       double xvar; // n times the variance of x elements
       double yvar; // n times the variance of y elements
       double covar; // n times the covariance
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES2 * 6; }
     };
 
     @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java Tue Apr 30 19:45:38 2013
@@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
@@ -99,8 +100,11 @@ public class GenericUDAFCount implements
     }
 
     /** class for storing count value. */
-    static class CountAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class CountAgg extends AbstractAggregationBuffer {
       long value;
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES2; }
     }
 
     @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java Tue Apr 30 19:45:38 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -38,7 +39,6 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * Compute the covariance covar_pop(x, y), using the following one-pass method
@@ -224,11 +224,14 @@ public class GenericUDAFCovariance exten
       }
     }
 
-    static class StdAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class StdAgg extends AbstractAggregationBuffer {
       long count; // number n of elements
       double xavg; // average of x elements
       double yavg; // average of y elements
       double covar; // n times the covariance
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES2 * 4; }
     };
 
     @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java Tue Apr 30 19:45:38 2013
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.udf.ge
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.List;
 
 import javaewah.EWAHCompressedBitmap;
 
@@ -109,8 +108,13 @@ public class GenericUDAFEWAHBitmap exten
     }
 
     /** class for storing the current partial result aggregation */
-    static class BitmapAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class BitmapAgg extends AbstractAggregationBuffer {
       EWAHCompressedBitmap bitmap;
+      @Override
+      public int estimate() {
+        return bitmap.sizeInBytes();
+      }
     }
 
     @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java Tue Apr 30 19:45:38 2013
@@ -41,6 +41,19 @@ import org.apache.hadoop.hive.serde2.obj
 @UDFType(deterministic = true)
 public abstract class GenericUDAFEvaluator implements Closeable {
 
+  public static @interface AggregationType {
+    boolean estimable() default false;
+  }
+
+  public static boolean isEstimable(AggregationBuffer buffer) {
+    if (buffer instanceof AbstractAggregationBuffer) {
+      Class<? extends AggregationBuffer> clazz = buffer.getClass();
+      AggregationType annotation = clazz.getAnnotation(AggregationType.class);
+      return annotation != null && annotation.estimable();
+    }
+    return false;
+  }
+
   /**
    * Mode.
    *
@@ -123,10 +136,21 @@ public abstract class GenericUDAFEvaluat
    * 
    * In the future, we may completely hide this class inside the Evaluator and
    * use integer numbers to identify which aggregation we are looking at.
+   *
+   * @deprecated use {@link AbstractAggregationBuffer} instead
    */
   public static interface AggregationBuffer {
   };
 
+  public static abstract class AbstractAggregationBuffer implements AggregationBuffer {
+    /**
+     * Estimate the size of memory which is occupied by aggregation buffer.
+     * Currently, hive assumes that primitives types occupies 16 byte and java object has
+     * 64 byte overhead for each. For map, each entry also has 64 byte overhead.
+     */
+    public int estimate() { return -1; }
+  }
+
   /**
    * Get a new aggregation object.
    */

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java Tue Apr 30 19:45:38 2013
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.udf.ge
 
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Random;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -27,21 +26,16 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * Computes an approximate histogram of a numerical column using a user-specified number of bins.
@@ -235,8 +229,13 @@ public class GenericUDAFHistogramNumeric
 
 
     // Aggregation buffer definition and manipulation methods
-    static class StdAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class StdAgg extends AbstractAggregationBuffer {
       NumericHistogram histogram; // the histogram object
+      @Override
+      public int estimate() {
+        return JavaDataModel.get().lengthFor(histogram);
+      }
     };
 
     @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java Tue Apr 30 19:45:38 2013
@@ -71,7 +71,7 @@ public class GenericUDAFMax extends Abst
     }
 
     /** class for storing the current max value */
-    static class MaxAgg implements AggregationBuffer {
+    static class MaxAgg extends AbstractAggregationBuffer {
       Object o;
     }
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java Tue Apr 30 19:45:38 2013
@@ -71,7 +71,7 @@ public class GenericUDAFMin extends Abst
     }
 
     /** class for storing the current max value */
-    static class MinAgg implements AggregationBuffer {
+    static class MinAgg extends AbstractAggregationBuffer {
       Object o;
     }
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java Tue Apr 30 19:45:38 2013
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
@@ -33,15 +34,9 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * Computes an approximate percentile (quantile) from an approximate histogram, for very
@@ -353,9 +348,16 @@ public class GenericUDAFPercentileApprox
 
     // Aggregation buffer methods. We wrap GenericUDAFHistogramNumeric's aggregation buffer
     // inside our own, so that we can also store requested quantile values between calls
-    static class PercentileAggBuf implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class PercentileAggBuf extends AbstractAggregationBuffer {
       NumericHistogram histogram;   // histogram used for quantile approximation
       double[] quantiles;           // the quantiles requested
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.lengthFor(histogram) +
+            model.array() + JavaDataModel.PRIMITIVES2 * quantiles.length;
+      }
     };
 
     @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java Tue Apr 30 19:45:38 2013
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -96,7 +97,8 @@ public class GenericUDAFSum extends Abst
     }
 
     /** class for storing decimal sum value. */
-    static class SumHiveDecimalAgg implements AggregationBuffer {
+    @AggregationType(estimable = false) // hard to know exactly for decimals
+    static class SumHiveDecimalAgg extends AbstractAggregationBuffer {
       boolean empty;
       HiveDecimal sum;
     }
@@ -188,9 +190,12 @@ public class GenericUDAFSum extends Abst
     }
 
     /** class for storing double sum value. */
-    static class SumDoubleAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class SumDoubleAgg extends AbstractAggregationBuffer {
       boolean empty;
       double sum;
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES1 + JavaDataModel.PRIMITIVES2; }
     }
 
     @Override
@@ -270,9 +275,12 @@ public class GenericUDAFSum extends Abst
     }
 
     /** class for storing double sum value. */
-    static class SumLongAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class SumLongAgg extends AbstractAggregationBuffer {
       boolean empty;
       long sum;
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES1 + JavaDataModel.PRIMITIVES2; }
     }
 
     @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java Tue Apr 30 19:45:38 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -171,10 +172,13 @@ public class GenericUDAFVariance extends
       }
     }
 
-    static class StdAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class StdAgg extends AbstractAggregationBuffer {
       long count; // number of elements
       double sum; // sum of elements
       double variance; // sum[x-avg^2] (this is actually n times the variance)
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES2 * 3; }
     };
 
     @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java Tue Apr 30 19:45:38 2013
@@ -338,7 +338,7 @@ public class GenericUDAFnGrams implement
     }
 
     // Aggregation buffer methods. 
-    static class NGramAggBuf implements AggregationBuffer {
+    static class NGramAggBuf extends AbstractAggregationBuffer {
       NGramEstimator nge;
       int n;
     };

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java Tue Apr 30 19:45:38 2013
@@ -313,4 +313,8 @@ public class NumericHistogram {
 
     return result;
   }
+
+  public int getNumBins() {
+    return bins == null ? 0 : bins.size();
+  }
 }

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/input4.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/input4.q?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/input4.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/input4.q Tue Apr 30 19:45:38 2013
@@ -3,6 +3,6 @@ EXPLAIN
 LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUT4;
 LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUT4;
 EXPLAIN FORMATTED
-SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4;
-SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4;
+SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias;
+SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
 

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/join39.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/join39.q?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/join39.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/join39.q Tue Apr 30 19:45:38 2013
@@ -1,7 +1,3 @@
-set hive.mapjoin.cache.numrows = 2;
-
-
-
 CREATE TABLE dest_j1(key STRING, value STRING, key1 string, val2 STRING) STORED AS TEXTFILE;
 
 explain

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/join40.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/join40.q?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/join40.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/join40.q Tue Apr 30 19:45:38 2013
@@ -29,7 +29,6 @@ SORT BY src1.key, src1.value, src2.key, 
 SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20)
 SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value;
 
-set hive.mapjoin.cache.numrows=2;
 
 EXPLAIN 
 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/mapjoin1.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/mapjoin1.q?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/mapjoin1.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/mapjoin1.q Tue Apr 30 19:45:38 2013
@@ -1,5 +1,3 @@
-set hive.mapjoin.cache.numrows=100;
-
 SELECT  /*+ MAPJOIN(b) */ sum(a.key) as sum_a
     FROM srcpart a
     JOIN src b ON a.key = b.key where a.ds is not null;

Modified: hive/branches/vectorization/ql/src/test/results/beelinepositive/join39.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/beelinepositive/join39.q.out?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/beelinepositive/join39.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/beelinepositive/join39.q.out Tue Apr 30 19:45:38 2013
@@ -1,10 +1,5 @@
 Saving all output to "!!{outputDirectory}!!/join39.q.raw". Enter "record" with no arguments to stop it.
 >>>  !run !!{qFileDirectory}!!/join39.q
->>>  set hive.mapjoin.cache.numrows = 2;
-No rows affected 
->>>  
->>>  
->>>  
 >>>  CREATE TABLE dest_j1(key STRING, value STRING, key1 string, val2 STRING) STORED AS TEXTFILE;
 No rows affected 
 >>>  

Modified: hive/branches/vectorization/ql/src/test/results/beelinepositive/join40.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/beelinepositive/join40.q.out?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/beelinepositive/join40.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/beelinepositive/join40.q.out Tue Apr 30 19:45:38 2013
@@ -3205,8 +3205,6 @@ SORT BY src1.key, src1.value, src2.key, 
 '9','val_9','9','val_9','9','val_9'
 548 rows selected 
 >>>  
->>>  set hive.mapjoin.cache.numrows=2;
-No rows affected 
 >>>  
 >>>  EXPLAIN 
 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value 

Modified: hive/branches/vectorization/ql/src/test/results/beelinepositive/mapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/beelinepositive/mapjoin1.q.out?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/beelinepositive/mapjoin1.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/beelinepositive/mapjoin1.q.out Tue Apr 30 19:45:38 2013
@@ -1,8 +1,5 @@
 Saving all output to "!!{outputDirectory}!!/mapjoin1.q.raw". Enter "record" with no arguments to stop it.
 >>>  !run !!{qFileDirectory}!!/mapjoin1.q
->>>  set hive.mapjoin.cache.numrows=100;
-No rows affected 
->>>  
 >>>  SELECT  /*+ MAPJOIN(b) */ sum(a.key) as sum_a 
 FROM srcpart a 
 JOIN src b ON a.key = b.key where a.ds is not null;

Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/input4.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/input4.q.out?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/input4.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/input4.q.out Tue Apr 30 19:45:38 2013
@@ -43,17 +43,17 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH 
 POSTHOOK: type: LOAD
 POSTHOOK: Output: default@input4
 PREHOOK: query: EXPLAIN FORMATTED
-SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4
+SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
 PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN FORMATTED
-SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4
+SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
 POSTHOOK: type: QUERY
-{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Split Sample:":{},"Alias -> Map Operator Tree:":{"input4":{"TS_0":{"SEL_1":{"FS_2":{"File Output Operator":{"GlobalTableId:":"0","compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT4))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT4) VALUE)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT4) KEY)))))"}
-PREHOOK: query: SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4
+{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Split Sample:":{},"Alias -> Map Operator Tree:":{"input4alias":{"TS_0":{"SEL_1":{"FS_2":{"File Output Operator":{"GlobalTableId:":"0","compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT4) Input4Alias)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL Input4Alias) VALUE)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL Input4Alias) KEY)))))"}
+PREHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
 PREHOOK: type: QUERY
 PREHOOK: Input: default@input4
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4
+POSTHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@input4
 #### A masked pattern was here ####

Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/show_functions.q.out?rev=1477796&r1=1477795&r2=1477796&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/show_functions.q.out Tue Apr 30 19:45:38 2013
@@ -110,7 +110,10 @@ month
 named_struct
 negative
 ngrams
+noop
+noopwithmap
 not
+npath
 ntile
 nvl
 or
@@ -174,6 +177,7 @@ var_samp
 variance
 weekofyear
 when
+windowingtablefunction
 xpath
 xpath_boolean
 xpath_double



Mime
View raw message