hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1501145 [6/13] - in /hive/branches/vectorization: ./ beeline/src/java/org/apache/hive/beeline/ beeline/src/test/org/apache/hive/beeline/src/test/ cli/src/java/org/apache/hadoop/hive/cli/ common/src/java/org/apache/hadoop/hive/common/metric...
Date Tue, 09 Jul 2013 09:07:43 GMT
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/FunctionWork.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/FunctionWork.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/FunctionWork.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/FunctionWork.java Tue Jul  9 09:07:35 2013
@@ -28,6 +28,8 @@ public class FunctionWork implements Ser
   private static final long serialVersionUID = 1L;
   private CreateFunctionDesc createFunctionDesc;
   private DropFunctionDesc dropFunctionDesc;
+  private CreateMacroDesc createMacroDesc;
+  private DropMacroDesc dropMacroDesc;
 
   /**
    * For serialization only.
@@ -43,6 +45,14 @@ public class FunctionWork implements Ser
     this.dropFunctionDesc = dropFunctionDesc;
   }
 
+  public FunctionWork(CreateMacroDesc createMacroDesc) {
+    this.createMacroDesc = createMacroDesc;
+  }
+
+  public FunctionWork(DropMacroDesc dropMacroDesc) {
+    this.dropMacroDesc = dropMacroDesc;
+  }
+
   public CreateFunctionDesc getCreateFunctionDesc() {
     return createFunctionDesc;
   }
@@ -59,4 +69,12 @@ public class FunctionWork implements Ser
     this.dropFunctionDesc = dropFunctionDesc;
   }
 
+  public CreateMacroDesc getCreateMacroDesc() {
+    return createMacroDesc;
+  }
+
+  public DropMacroDesc getDropMacroDesc() {
+    return dropMacroDesc;
+  }
+
 }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java Tue Jul  9 09:07:35 2013
@@ -66,6 +66,8 @@ public enum HiveOperation {
   SHOWLOCKS("SHOWLOCKS", null, null),
   CREATEFUNCTION("CREATEFUNCTION", null, null),
   DROPFUNCTION("DROPFUNCTION", null, null),
+  CREATEMACRO("CREATEMACRO", null, null),
+  DROPMACRO("DROPMACRO", null, null),
   CREATEVIEW("CREATEVIEW", null, null),
   DROPVIEW("DROPVIEW", null, null),
   CREATEINDEX("CREATEINDEX", null, null),

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java Tue Jul  9 09:07:35 2013
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.plan;
 
 import java.io.ByteArrayOutputStream;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -111,6 +112,12 @@ public class MapredWork extends Abstract
   private final Map<String, List<SortCol>> sortedColsByDirectory =
       new HashMap<String, List<SortCol>>();
 
+  // use sampled partitioning
+  private int samplingType;
+
+  public static final int SAMPLING_ON_PREV_MR = 1;  // todo HIVE-3841
+  public static final int SAMPLING_ON_START = 2;    // sampling on task running
+
   public MapredWork() {
     aliasToPartnInfo = new LinkedHashMap<String, PartitionDesc>();
   }
@@ -223,6 +230,33 @@ public class MapredWork extends Abstract
     this.aliasToWork = aliasToWork;
   }
 
+  public void mergeAliasedInput(String alias, String pathDir, PartitionDesc partitionInfo) {
+    ArrayList<String> aliases = pathToAliases.get(pathDir);
+    if (aliases == null) {
+      aliases = new ArrayList<String>(Arrays.asList(alias));
+      pathToAliases.put(pathDir, aliases);
+      pathToPartitionInfo.put(pathDir, partitionInfo);
+    } else {
+      aliases.add(alias);
+    }
+  }
+
+  public ArrayList<String> getAliases() {
+    return new ArrayList<String>(aliasToWork.keySet());
+  }
+
+  public ArrayList<Operator<?>> getWorks() {
+    return new ArrayList<Operator<?>>(aliasToWork.values());
+  }
+
+  public ArrayList<String> getPaths() {
+    return new ArrayList<String>(pathToAliases.keySet());
+  }
+
+  public ArrayList<PartitionDesc> getPartitionDescs() {
+    return new ArrayList<PartitionDesc>(aliasToPartnInfo.values());
+  }
+
   /**
    * @return the mapredLocalWork
    */
@@ -582,4 +616,18 @@ public class MapredWork extends Abstract
       }
     }
   }
+
+  public int getSamplingType() {
+    return samplingType;
+  }
+
+  public void setSamplingType(int samplingType) {
+    this.samplingType = samplingType;
+  }
+
+  @Explain(displayName = "Sampling")
+  public String getSamplingTypeString() {
+    return samplingType == 1 ? "SAMPLING_ON_PREV_MR" :
+        samplingType == 2 ? "SAMPLING_ON_START" : null;
+  }
 }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java Tue Jul  9 09:07:35 2013
@@ -188,6 +188,16 @@ public class PartitionDesc implements Se
     return properties;
   }
 
+  public java.util.Properties getOverlayedProperties(){
+    if (tableDesc != null) {
+      Properties overlayedProps = new Properties(tableDesc.getProperties());
+      overlayedProps.putAll(getProperties());
+      return overlayedProps;
+    } else {
+      return getProperties();
+    }
+  }
+
   public void setProperties(final java.util.Properties properties) {
     this.properties = properties;
   }
@@ -232,6 +242,10 @@ public class PartitionDesc implements Se
     return baseFileName;
   }
 
+  public boolean isPartitioned() {
+    return partSpec != null && !partSpec.isEmpty();
+  }
+
   @Override
   public PartitionDesc clone() {
     PartitionDesc ret = new PartitionDesc();

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java Tue Jul  9 09:07:35 2013
@@ -270,7 +270,11 @@ public final class PlanUtils {
       String fileFormat) {
     TableDesc tblDesc = getTableDesc(LazySimpleSerDe.class, "" + Utilities.ctrlaCode, cols, colTypes,
         false, false, fileFormat);
+    //enable escaping
     tblDesc.getProperties().setProperty(serdeConstants.ESCAPE_CHAR, "\\");
+    //enable extended nesting levels
+    tblDesc.getProperties().setProperty(
+        LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS, "true");    
     return tblDesc;
   }
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java Tue Jul  9 09:07:35 2013
@@ -31,12 +31,14 @@ public class UDTFDesc extends AbstractOp
   private static final long serialVersionUID = 1L;
 
   private GenericUDTF genericUDTF;
+  private boolean outerLV;
 
   public UDTFDesc() {
   }
 
-  public UDTFDesc(final GenericUDTF genericUDTF) {
+  public UDTFDesc(final GenericUDTF genericUDTF, boolean outerLV) {
     this.genericUDTF = genericUDTF;
+    this.outerLV = outerLV;
   }
 
   public GenericUDTF getGenericUDTF() {
@@ -51,4 +53,17 @@ public class UDTFDesc extends AbstractOp
   public String getUDTFName() {
     return genericUDTF.toString();
   }
+
+  public boolean isOuterLV() {
+    return outerLV;
+  }
+
+  public void setOuterLV(boolean outerLV) {
+    this.outerLV = outerLV;
+  }
+
+  @Explain(displayName = "outer lateral view")
+  public String isOuterLateralView() {
+    return outerLV ? "true" : null;
+  }
 }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Tue Jul  9 09:07:35 2013
@@ -22,16 +22,15 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintStream;
-import java.lang.management.ManagementFactory;
 import java.net.URI;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
-import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.UUID;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang.StringUtils;
@@ -303,9 +302,7 @@ public class SessionState {
    * @return the unique string
    */
   private static String makeSessionId() {
-    String userid = System.getProperty("user.name");
-    return userid + "_" + ManagementFactory.getRuntimeMXBean().getName() + "_"
-        + DATE_FORMAT.format(new Date());
+    return UUID.randomUUID().toString();
   }
 
   /**

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java Tue Jul  9 09:07:35 2013
@@ -238,7 +238,9 @@ public class JDBCStatsPublisher implemen
         try {
           // The following closes the derby connection. It throws an exception that has to be caught
           // and ignored.
-          DriverManager.getConnection(connectionString + ";shutdown=true");
+          synchronized(DriverManager.class) {
+            DriverManager.getConnection(connectionString + ";shutdown=true");
+          }
         } catch (Exception e) {
           // Do nothing because we know that an exception is thrown anyway.
         }
@@ -261,22 +263,24 @@ public class JDBCStatsPublisher implemen
       connectionString = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVESTATSDBCONNECTIONSTRING);
       String driver = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVESTATSJDBCDRIVER);
       Class.forName(driver).newInstance();
-      DriverManager.setLoginTimeout(timeout);
-      conn = DriverManager.getConnection(connectionString);
+      synchronized(DriverManager.class) {
+        DriverManager.setLoginTimeout(timeout);
+        conn = DriverManager.getConnection(connectionString);
 
-      Statement stmt = conn.createStatement();
-      stmt.setQueryTimeout(timeout);
+        Statement stmt = conn.createStatement();
+        stmt.setQueryTimeout(timeout);
 
-      // Check if the table exists
-      DatabaseMetaData dbm = conn.getMetaData();
-      ResultSet rs = dbm.getTables(null, null, JDBCStatsUtils.getStatTableName(), null);
-      boolean tblExists = rs.next();
-      if (!tblExists) { // Table does not exist, create it
-        String createTable = JDBCStatsUtils.getCreate("");
-        stmt.executeUpdate(createTable);
-        stmt.close();
+        // Check if the table exists
+        DatabaseMetaData dbm = conn.getMetaData();
+        ResultSet rs = dbm.getTables(null, null, JDBCStatsUtils.getStatTableName(), null);
+        boolean tblExists = rs.next();
+        if (!tblExists) { // Table does not exist, create it
+          String createTable = JDBCStatsUtils.getCreate("");
+          stmt.executeUpdate(createTable);
+          stmt.close();
+        }
+        closeConnection();
       }
-      closeConnection();
     } catch (Exception e) {
       LOG.error("Error during JDBC initialization. ", e);
       return false;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java Tue Jul  9 09:07:35 2013
@@ -149,7 +149,7 @@ public class UDAFPercentile extends UDAF
       }
       if (state.percentiles == null) {
         if (percentile < 0.0 || percentile > 1.0) {
-          throw new RuntimeException("Percentile value must be wihin the range of 0 to 1.");
+          throw new RuntimeException("Percentile value must be within the range of 0 to 1.");
         }
         state.percentiles = new ArrayList<DoubleWritable>(1);
         state.percentiles.add(new DoubleWritable(percentile.doubleValue()));
@@ -238,7 +238,7 @@ public class UDAFPercentile extends UDAF
         if(percentiles != null) {
           for (int i = 0; i < percentiles.size(); i++) {
             if (percentiles.get(i).get() < 0.0 || percentiles.get(i).get() > 1.0) {
-              throw new RuntimeException("Percentile value must be wihin the range of 0 to 1.");
+              throw new RuntimeException("Percentile value must be within the range of 0 to 1.");
             }
           }
           state.percentiles = new ArrayList<DoubleWritable>(percentiles);

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java Tue Jul  9 09:07:35 2013
@@ -1,27 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.hadoop.hive.ql.udf.generic;
 
-import java.util.List;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 
+import java.util.ArrayList;
+
 @Description(name ="inline", value= "_FUNC_( ARRAY( STRUCT()[,STRUCT()] "
 + "- explodes and array and struct into a table")
 public class GenericUDTFInline extends GenericUDTF {
 
-  private Object[] forwardObj;
   private ListObjectInspector li;
-  private StructObjectInspector daStruct;
 
   public GenericUDTFInline(){
-
   }
 
   @Override
@@ -39,29 +52,13 @@ public class GenericUDTFInline extends G
     if (sub.getCategory() != Category.STRUCT){
       throw new UDFArgumentException("The sub element must be struct, but was "+sub.getTypeName());
     }
-    daStruct = (StructObjectInspector) sub;
-    forwardObj = new Object[daStruct.getAllStructFieldRefs().size()];
-    return daStruct;
+    return (StructObjectInspector) sub;
   }
 
   @Override
   public void process(Object[] os) throws HiveException {
-    //list is always one item
-    List l = li.getList(os);
-    List<? extends StructField> fields = this.daStruct.getAllStructFieldRefs();
-    for (Object linner: l ){
-      List<List> innerList = (List) linner;
-      for (List rowList : innerList){
-        int i=0;
-        for (StructField f: fields){
-          GenericUDFUtils.ReturnObjectInspectorResolver res
-            = new GenericUDFUtils.ReturnObjectInspectorResolver();
-          res.update(f.getFieldObjectInspector());
-          this.forwardObj[i]=res.convertIfNecessary(rowList.get(i), f.getFieldObjectInspector());
-          i++;
-        }
-        forward(this.forwardObj);
-      }
+    for (Object row : new ArrayList<Object>(li.getList(os[0]))) {
+      forward(row);
     }
   }
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java Tue Jul  9 09:07:35 2013
@@ -28,7 +28,13 @@ public class NumDistinctValueEstimator {
 
   static final Log LOG = LogFactory.getLog(NumDistinctValueEstimator.class.getName());
 
-  private final int bitVectorSize = 32;
+  /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number.
+   * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1.
+   * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k will not be pair wise
+   * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1
+   * thus introducing errors in the estimates.
+   */
+  private static final int bitVectorSize = 31;
   private int numBitVectors;
 
   // Refer to Flajolet-Martin'86 for the value of phi
@@ -53,8 +59,23 @@ public class NumDistinctValueEstimator {
     a = new int[numBitVectors];
     b = new int[numBitVectors];
 
-    aValue = new Random(79798);
-    bValue = new Random(34115);
+    /* Use a large prime number as a seed to the random number generator.
+     * Java's random number generator uses the Linear Congruential Generator to generate random
+     * numbers using the following recurrence relation,
+     *
+     * X(n+1) = (a X(n) + c ) mod m
+     *
+     *  where X0 is the seed. Java implementation uses m = 2^48. This is problematic because 2^48
+     *  is not a prime number and hence the set of numbers from 0 to m don't form a finite field.
+     *  If these numbers don't come from a finite field any give X(n) and X(n+1) may not be pair
+     *  wise independent.
+     *
+     *  However, empirically passing in prime numbers as seeds seems to work better than when passing
+     *  composite numbers as seeds. Ideally Java's Random should pick m such that m is prime.
+     *
+     */
+    aValue = new Random(99397);
+    bValue = new Random(9876413);
 
     for (int i = 0; i < numBitVectors; i++) {
       int randVal;
@@ -76,11 +97,11 @@ public class NumDistinctValueEstimator {
       b[i] = randVal;
 
       if (a[i] < 0) {
-        a[i] = a[i] + (1 << (bitVectorSize -1));
+        a[i] = a[i] + (1 << bitVectorSize - 1);
       }
 
       if (b[i] < 0) {
-        b[i] = b[i] + (1 << (bitVectorSize -1));
+        b[i] = b[i] + (1 << bitVectorSize - 1);
       }
     }
   }
@@ -197,8 +218,8 @@ public class NumDistinctValueEstimator {
   }
 
   private int generateHash(long v, int hashNum) {
-    int mod = 1 << (bitVectorSize - 1) - 1;
-    long tempHash = a[hashNum] * v + b[hashNum];
+    int mod = (1<<bitVectorSize) - 1;
+    long tempHash = a[hashNum] * v  + b[hashNum];
     tempHash %= mod;
     int hash = (int) tempHash;
 
@@ -206,7 +227,7 @@ public class NumDistinctValueEstimator {
      * Hence hash value has to be non-negative.
      */
     if (hash < 0) {
-      hash = hash + mod + 1;
+      hash = hash + mod;
     }
     return hash;
   }
@@ -266,6 +287,7 @@ public class NumDistinctValueEstimator {
     bitVector[hash%numBitVectors].set(index);
   }
 
+
   public void mergeEstimators(NumDistinctValueEstimator o) {
     // Bitwise OR the bitvector with the bitvector in the agg buffer
     for (int i=0; i<numBitVectors; i++) {
@@ -289,36 +311,22 @@ public class NumDistinctValueEstimator {
     return ((long)numDistinctValues);
   }
 
-  /* We use two estimators - one due to Flajolet-Martin and a modification due to
-   * Alon-Matias-Szegedy. FM uses the location of the least significant zero as an estimate of
-   * log2(phi*ndvs).
-   * AMS uses the location of the most significant one as an estimate of the log2(ndvs).
-   * We average the two estimators with suitable modifications to obtain an estimate of ndvs.
+  /* We use the Flajolet-Martin estimator to estimate the number of distinct values.FM uses the
+   * location of the least significant zero as an estimate of log2(phi*ndvs).
    */
   public long estimateNumDistinctValues() {
     int sumLeastSigZero = 0;
-    int sumMostSigOne = 0;
     double avgLeastSigZero;
-    double avgMostSigOne;
     double numDistinctValues;
 
     for (int i=0; i< numBitVectors; i++) {
       int leastSigZero = bitVector[i].nextClearBit(0);
       sumLeastSigZero += leastSigZero;
-      int mostSigOne = bitVectorSize;
-
-      for (int j=0; j< bitVectorSize; j++) {
-        if (bitVector[i].get(j)) {
-          mostSigOne = j;
-        }
-      }
-      sumMostSigOne += mostSigOne;
     }
 
     avgLeastSigZero =
         (double)(sumLeastSigZero/(numBitVectors * 1.0)) - (Math.log(phi)/Math.log(2.0));
-    avgMostSigOne = (double)(sumMostSigOne/(numBitVectors * 1.0));
-    numDistinctValues = Math.pow(2.0, (avgMostSigOne + avgLeastSigZero)/2.0);
+    numDistinctValues = Math.pow(2.0, avgLeastSigZero);
     return ((long)(numDistinctValues));
   }
 }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDTFCollector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDTFCollector.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDTFCollector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UDTFCollector.java Tue Jul  9 09:07:35 2013
@@ -33,7 +33,8 @@ public class UDTFCollector implements Co
    * @see
    * org.apache.hadoop.hive.ql.udf.generic.Collector#collect(java.lang.Object)
    */
-  UDTFOperator op = null;
+  final UDTFOperator op;
+  private transient int counter;
 
   public UDTFCollector(UDTFOperator op) {
     this.op = op;
@@ -42,6 +43,14 @@ public class UDTFCollector implements Co
   @Override
   public void collect(Object input) throws HiveException {
     op.forwardUDTFOutput(input);
+    counter++;
   }
 
+  public int getCounter() {
+    return counter;
+  }
+
+  public void reset() {
+    counter = 0;
+  }
 }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java Tue Jul  9 09:07:35 2013
@@ -121,6 +121,7 @@ public class UDFXPathUtil {
         throw new IOException("Stream closed");
     }
 
+    @Override
     public int read() throws IOException {
       ensureOpen();
       if (next >= length)
@@ -128,6 +129,7 @@ public class UDFXPathUtil {
       return str.charAt(next++);
     }
 
+    @Override
     public int read(char cbuf[], int off, int len) throws IOException {
       ensureOpen();
       if ((off < 0) || (off > cbuf.length) || (len < 0)
@@ -144,6 +146,7 @@ public class UDFXPathUtil {
       return n;
     }
 
+    @Override
     public long skip(long ns) throws IOException {
       ensureOpen();
       if (next >= length)
@@ -155,15 +158,18 @@ public class UDFXPathUtil {
       return n;
     }
 
+    @Override
     public boolean ready() throws IOException {
       ensureOpen();
       return true;
     }
 
+    @Override
     public boolean markSupported() {
       return true;
     }
 
+    @Override
     public void mark(int readAheadLimit) throws IOException {
       if (readAheadLimit < 0) {
         throw new IllegalArgumentException("Read-ahead limit < 0");
@@ -172,11 +178,13 @@ public class UDFXPathUtil {
       mark = next;
     }
 
+    @Override
     public void reset() throws IOException {
       ensureOpen();
       next = mark;
     }
 
+    @Override
     public void close() {
       str = null;
     }

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java Tue Jul  9 09:07:35 2013
@@ -1013,7 +1013,6 @@ public class QTestUtil {
         ".*LOCATION '.*",
         ".*transient_lastDdlTime.*",
         ".*last_modified_.*",
-        ".*java.lang.RuntimeException.*",
         ".*at org.*",
         ".*at sun.*",
         ".*at java.*",

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java Tue Jul  9 09:07:35 2013
@@ -62,8 +62,8 @@ public class TestExecDriver extends Test
 
   static HiveConf conf;
 
-  private static String tmpdir = "/tmp/" + System.getProperty("user.name")
-      + "/";
+  private static String tmpdir = System.getProperty("java.io.tmpdir") + File.separator + System.getProperty("user.name")
+      + File.separator;
   private static Path tmppath = new Path(tmpdir);
   private static Hive db;
   private static FileSystem fs;

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java Tue Jul  9 09:07:35 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.util.ArrayList;
 import java.util.LinkedHashMap;
 
@@ -87,7 +88,8 @@ public class TestPlan extends TestCase {
       // store into configuration
       JobConf job = new JobConf(TestPlan.class);
       job.set("fs.default.name", "file:///");
-      Utilities.setMapRedWork(job, mrwork,"/tmp/" + System.getProperty("user.name") + "/hive");
+      Utilities.setMapRedWork(job, mrwork, System.getProperty("java.io.tmpdir") + File.separator +
+        System.getProperty("user.name") + File.separator + "hive");
       MapredWork mrwork2 = Utilities.getMapRedWork(job);
       Utilities.clearMapRedWork(job);
 

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java Tue Jul  9 09:07:35 2013
@@ -20,8 +20,6 @@ package org.apache.hadoop.hive.ql.io;
 import java.io.IOException;
 import java.util.Random;
 
-import junit.framework.TestCase;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -34,12 +32,13 @@ import org.apache.hadoop.io.SequenceFile
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.DefaultCodec;
+import static org.junit.Assert.*;
 
 /**
  * PerformTestRCFileAndSeqFile.
  *
  */
-public class PerformTestRCFileAndSeqFile extends TestCase {
+public class PerformTestRCFileAndSeqFile {
 
   private final Configuration conf = new Configuration();
 

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveInputOutputBuffer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveInputOutputBuffer.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveInputOutputBuffer.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveInputOutputBuffer.java Tue Jul  9 09:07:35 2013
@@ -17,7 +17,13 @@
  */
 package org.apache.hadoop.hive.ql.io;
 
+import static org.junit.Assert.assertArrayEquals;
+
+import java.io.DataOutput;
 import java.io.IOException;
+import java.util.Random;
+
+import org.junit.Test;
 
 import junit.framework.TestCase;
 
@@ -27,17 +33,196 @@ import junit.framework.TestCase;
  */
 public class TestHiveInputOutputBuffer extends TestCase {
 
+  private static final int numCases = 14; 
+  
+  private static final String asciiLine1 = "Foo 12345 moo";
+  private static final String asciiLine2 = "Line two";
+  private static final String asciiString = asciiLine1 + "\n" + asciiLine2 + "\r\n";
+
   public void testReadAndWrite() throws IOException {
     String testString = "test_hive_input_output_number_0";
     byte[] string_bytes = testString.getBytes();
     NonSyncDataInputBuffer inBuffer = new NonSyncDataInputBuffer();
     NonSyncDataOutputBuffer outBuffer = new NonSyncDataOutputBuffer();
-    outBuffer.write(string_bytes);
-    inBuffer.reset(outBuffer.getData(), 0, outBuffer.getLength());
-    byte[] readBytes = new byte[string_bytes.length];
-    inBuffer.read(readBytes);
-    String readString = new String(readBytes);
-    assertEquals("Field testReadAndWrite()", readString, testString);
+    try {
+      outBuffer.write(string_bytes);
+      inBuffer.reset(outBuffer.getData(), 0, outBuffer.getLength());
+      byte[] readBytes = new byte[string_bytes.length];
+      inBuffer.read(readBytes);
+      String readString = new String(readBytes);
+      assertEquals("Field testReadAndWrite()", readString, testString);
+    } finally {
+      inBuffer.close();
+      outBuffer.close();
+    }
   }
 
+  @SuppressWarnings("deprecation")
+  private static void readJunk(NonSyncDataInputBuffer in, Random r, long seed, int iter) 
+      throws IOException {
+    r.setSeed(seed);
+    for (int i = 0; i < iter; ++i) {
+      switch (r.nextInt(numCases)) {
+        case 0:
+          assertEquals((byte)(r.nextInt() & 0xFF), in.readByte()); break;
+        case 1:
+          assertEquals((short)(r.nextInt() & 0xFFFF), in.readShort()); break;
+        case 2:
+          assertEquals(r.nextInt(), in.readInt()); break;
+        case 3:
+          assertEquals(r.nextLong(), in.readLong()); break;
+        case 4:
+          assertEquals(Double.doubleToLongBits(r.nextDouble()),
+                       Double.doubleToLongBits(in.readDouble())); break;
+        case 5:
+          assertEquals(Float.floatToIntBits(r.nextFloat()),
+                       Float.floatToIntBits(in.readFloat())); break;
+        case 6:
+          int len = r.nextInt(1024);
+          // 1 (test #readFully(3)):
+          final byte[] vb = new byte[len];
+          r.nextBytes(vb);
+          final byte[] b = new byte[len];
+          in.readFully(b, 0, len);
+          assertArrayEquals(vb, b);
+          // 2 (test #read(3)):
+          r.nextBytes(vb);
+          in.read(b, 0, len);
+          assertArrayEquals(vb, b);
+          // 3 (test #readFully(1)):
+          r.nextBytes(vb);
+          in.readFully(b);
+          assertArrayEquals(vb, b);
+          break;
+        case 7:
+          assertEquals(r.nextBoolean(), in.readBoolean());
+          break;
+        case 8:
+          assertEquals((char)r.nextInt(), in.readChar());
+          break;
+        case 9:
+          int actualUB = in.readUnsignedByte();
+          assertTrue(actualUB >= 0);
+          assertTrue(actualUB <= 255);
+          assertEquals(r.nextInt() & 0xFF, actualUB);
+          break;
+        case 10:
+          int actualUS = in.readUnsignedShort();
+          assertTrue(actualUS >= 0);
+          assertTrue(actualUS <= 0xFFFF);
+          assertEquals(r.nextInt() & 0xFFFF, actualUS);
+          break;
+        case 11:
+          String expectedString1 = composeString(1024, r);
+          assertEquals(expectedString1, in.readUTF());
+          String expectedString2 = composeString(1024, r);
+          assertEquals(expectedString2, NonSyncDataInputBuffer.readUTF(in));
+          break;
+        case 12:
+          assertEquals(asciiLine1, in.readLine());
+          assertEquals(asciiLine2, in.readLine());
+          break;
+        case 13:
+          in.skipBytes(8);
+          r.nextLong(); // ignore
+          assertEquals(r.nextLong(), in.readLong());
+          break;
+      }
+    }
+  }
+  
+  private static void writeJunk(DataOutput out, Random r, long seed, int iter)
+      throws IOException  {
+    r.setSeed(seed);
+    for (int i = 0; i < iter; ++i) {
+      switch (r.nextInt(numCases)) {
+        case 0: out.writeByte(r.nextInt()); break;
+        case 1: out.writeShort((short)(r.nextInt() & 0xFFFF)); break;
+        case 2: out.writeInt(r.nextInt()); break;
+        case 3: out.writeLong(r.nextLong()); break;
+        case 4: out.writeDouble(r.nextDouble()); break;
+        case 5: out.writeFloat(r.nextFloat()); break;
+        case 6:
+          byte[] b = new byte[r.nextInt(1024)];
+          // 1:
+          r.nextBytes(b);
+          out.write(b);
+          // 2:
+          r.nextBytes(b);
+          out.write(b);
+          // 3:
+          r.nextBytes(b);
+          out.write(b);
+          break;
+        case 7:
+          out.writeBoolean(r.nextBoolean());
+          break;
+        case 8:
+          out.writeChar((char)r.nextInt());
+          break;
+        case 9:
+          out.writeByte((byte)r.nextInt());
+          break;
+        case 10:
+          out.writeShort((short)r.nextInt());
+          break;
+        case 11:
+          String string = composeString(1024, r);
+          out.writeUTF(string);
+          String string2 = composeString(1024, r);
+          out.writeUTF(string2);
+          break;
+        case 12:
+          byte[] bb = asciiString.getBytes("UTF-8");
+          out.write(bb);
+          break;
+        case 13:
+          out.writeLong(r.nextLong());
+          out.writeLong(r.nextLong());
+          break;
+      }
+    }
+  }
+
+  private static String composeString(int len, Random r) {
+    char[] cc = new char[len];
+    char ch;
+    for (int i = 0; i<len; i++) {
+      do {
+        ch = (char)r.nextInt();
+      } while (!Character.isDefined(ch) 
+          || Character.isHighSurrogate(ch)
+          || Character.isLowSurrogate(ch));
+      cc[i] = ch;
+    }
+    return new String(cc);
+  }
+  
+  /**
+   * Tests methods of {@link NonSyncDataInputBuffer}.
+   * @throws IOException
+   */
+  @Test
+  public void testBaseBuffers() throws IOException {
+    NonSyncDataOutputBuffer dob = new NonSyncDataOutputBuffer();
+    final Random r = new Random();
+    final long seed = 0x0123456789ABCDEFL; // hardcoded for reproducibility.
+    r.setSeed(seed);
+    System.out.println("SEED: " + seed);
+    
+    writeJunk(dob, r, seed, 1000);
+    NonSyncDataInputBuffer dib = new NonSyncDataInputBuffer();
+    dib.reset(dob.getData(), 0, dob.getLength());
+    assertEquals(0, dib.getPosition());
+    assertEquals(dob.getLength(), dib.getLength());
+    readJunk(dib, r, seed, 1000);
+
+    dob.reset();
+    writeJunk(dob, r, seed, 1000);
+    dib.reset(dob.getData(), dob.getLength());
+    assertEquals(0, dib.getPosition());
+    assertEquals(dob.getLength(), dib.getLength());
+    readJunk(dib, r, seed, 1000);
+  }
+  
 }

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java Tue Jul  9 09:07:35 2013
@@ -23,11 +23,13 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.RandomAccessFile;
 import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Properties;
 import java.util.Random;
 
 import junit.framework.TestCase;
+import static org.junit.Assert.*;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -219,6 +221,94 @@ public class TestRCFile extends TestCase
 
     reader.close();
   }
+  
+  /**
+   * Tests {@link RCFile.Reader#getColumn(int, BytesRefArrayWritable) } method.
+   * @throws IOException
+   */
+  public void testGetColumn() throws IOException {
+    fs.delete(file, true);
+
+    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
+    RCFile.Writer writer =
+      new RCFile.Writer(fs, conf, file, null,
+                        RCFile.createMetadata(new Text("apple"),
+                                              new Text("block"),
+                                              new Text("cat"),
+                                              new Text("dog")),
+                        new DefaultCodec());
+    
+    byte[][] record_1 = {
+        "123".getBytes("UTF-8"), 
+        "456".getBytes("UTF-8"),
+        "789".getBytes("UTF-8"), 
+        "1000".getBytes("UTF-8"),
+        "5.3".getBytes("UTF-8"), 
+        "hive and hadoop".getBytes("UTF-8"),
+        new byte[0], 
+        "NULL".getBytes("UTF-8") };
+    byte[][] record_2 = {
+        "100".getBytes("UTF-8"), 
+        "200".getBytes("UTF-8"),
+        "123".getBytes("UTF-8"), 
+        "1000".getBytes("UTF-8"),
+        "5.3".getBytes("UTF-8"), 
+        "hive and hadoop".getBytes("UTF-8"),
+        new byte[0], 
+        "NULL".getBytes("UTF-8")};
+    
+    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
+    for (int i = 0; i < record_1.length; i++) {
+      BytesRefWritable cu = new BytesRefWritable(record_1[i], 0,
+          record_1[i].length);
+      bytes.set(i, cu);
+    }
+    writer.append(bytes);
+    bytes.clear();
+    for (int i = 0; i < record_2.length; i++) {
+      BytesRefWritable cu = new BytesRefWritable(record_2[i], 0,
+          record_2[i].length);
+      bytes.set(i, cu);
+    }
+    writer.append(bytes);
+    writer.close();
+
+    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
+    
+    LongWritable rowID = new LongWritable();
+    assertTrue(reader.next(rowID));
+    assertEquals(rowID.get(), 0L);
+    
+    assertTrue(reader.next(rowID));
+    assertEquals(rowID.get(), 1L);
+    
+    BytesRefArrayWritable result = null;
+    BytesRefWritable brw;
+    for (int col=0; col < 8; col++) {
+      BytesRefArrayWritable result2 = reader.getColumn(col, result);
+      if (result == null) {
+        assertNotNull(result2);
+        result = result2;
+      } else {
+        // #getColumn(2) should return the instance passed in: 
+        assertSame(result2, result);
+      }
+      // each column has height of 2: 
+      assertEquals(2, result.size());
+      for (int row=0; row<result.size(); row++) {
+        brw = result.get(row);
+        int start = brw.getStart();
+        int len = brw.getLength();
+        byte[] actualData = Arrays.copyOfRange(brw.getData(), start, start + len);
+        byte[] expectedData = (row == 0) ? record_1[col] : record_2[col];
+        assertArrayEquals("col="+col+" : row="+row,  expectedData, actualData);
+      }
+      
+      result.clear();
+    }
+    
+    reader.close();
+  }
 
   public void testReadCorruptFile() throws IOException, SerDeException {
     fs.delete(file, true);

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java Tue Jul  9 09:07:35 2013
@@ -20,9 +20,6 @@ package org.apache.hadoop.hive.ql.io;
 import java.io.File;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
-import java.io.Serializable;
-import java.net.URL;
-import java.net.URLClassLoader;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -35,20 +32,12 @@ import org.apache.hadoop.fs.ContentSumma
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStore;
-import org.apache.hadoop.hive.ql.CommandNeedRetryException;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.QueryPlan;
 import org.apache.hadoop.hive.ql.exec.ExecDriver;
 import org.apache.hadoop.hive.ql.exec.MapRedTask;
-import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.parse.ParseDriver;
-import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
@@ -184,6 +173,9 @@ public class TestSymlinkTextInputFormat 
       
       CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance(
           CombineHiveInputFormat.class, newJob);
+      
+      combineInputFormat.validateInput(newJob);
+      
       InputSplit[] retSplits = combineInputFormat.getSplits(newJob, 1);
       assertEquals(1, retSplits.length);
     } catch (Exception e) {

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java Tue Jul  9 09:07:35 2013
@@ -43,7 +43,7 @@ public class TestFileDump {
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
   Path resourceDir = new Path(System.getProperty("test.build.resources",
-      "src" + File.separator + "test" + File.separator + "resources"));
+      "ql" + File.separator + "src" + File.separator + "test" + File.separator + "resources"));
 
   Configuration conf;
   FileSystem fs;

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestEmbeddedLockManager.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestEmbeddedLockManager.java?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestEmbeddedLockManager.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestEmbeddedLockManager.java Tue Jul  9 09:07:35 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.lockmgr;
 
 import junit.framework.TestCase;
+
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
 import org.junit.Assert;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/combine2_win.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/combine2_win.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/combine2_win.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/combine2_win.q Tue Jul  9 09:07:35 2013
@@ -11,6 +11,8 @@ set hive.merge.smallfiles.avgsize=0;
 -- INCLUDE_OS_WINDOWS
 -- included only on  windows because of difference in file name encoding logic
 
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
+
 create table combine2(key string) partitioned by (value string);
 
 insert overwrite table combine2 partition(value) 

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/ctas_colname.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/ctas_colname.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/ctas_colname.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/ctas_colname.q Tue Jul  9 09:07:35 2013
@@ -6,43 +6,43 @@ explain
 create table summary as select *, sum(key), count(value) from src;
 create table summary as select *, sum(key), count(value) from src;
 describe formatted summary;
-select * from summary;
+select * from summary order by `_col0`, `_col1`, `_c1`, `_c2`;
 
 -- window functions
 explain
 create table x4 as select *, rank() over(partition by key order by value) as rr from src1;
 create table x4 as select *, rank() over(partition by key order by value) as rr from src1;
 describe formatted x4;
-select * from x4;
+select * from x4 order by key, value, rr;
 
 explain
 create table x5 as select *, lead(key,1) over(partition by key order by value) from src limit 20;
 create table x5 as select *, lead(key,1) over(partition by key order by value) from src limit 20;
 describe formatted x5;
-select * from x5;
+select * from x5 order by key, value, tok_windowspec;
 
 -- sub queries
 explain
 create table x6 as select * from (select *, max(key) from src1) a;
 create table x6 as select * from (select *, max(key) from src1) a;
 describe formatted x6;
-select * from x6;
+select * from x6 order by `_col0`, `_c1`;
 
 explain
 create table x7 as select * from (select * from src group by key) a;
 create table x7 as select * from (select * from src group by key) a;
 describe formatted x7;
-select * from x7;
+select * from x7 order by `_col0`;
 
 explain
 create table x8 as select * from (select * from src group by key having key < 9) a;
 create table x8 as select * from (select * from src group by key having key < 9) a;
 describe formatted x8;
-select * from x8;
+select * from x8 order by `_col0`;
 
 explain
 create table x9 as select * from (select max(value),key from src group by key having key < 9 AND max(value) IS NOT NULL) a;
 create table x9 as select * from (select max(value),key from src group by key having key < 9 AND max(value) IS NOT NULL) a;
 describe formatted x9;
-select * from x9;
+select * from x9 order by key, `_c0`;
 

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/groupby_complex_types.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/groupby_complex_types.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/groupby_complex_types.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/groupby_complex_types.q Tue Jul  9 09:07:35 2013
@@ -1,16 +1,19 @@
-
 CREATE TABLE DEST1(key ARRAY<STRING>, value BIGINT) STORED AS TEXTFILE;
 CREATE TABLE DEST2(key MAP<STRING, STRING>, value BIGINT) STORED AS TEXTFILE;
+CREATE TABLE DEST3(key STRUCT<col1:STRING, col2:STRING>, value BIGINT) STORED AS TEXTFILE;
 
 EXPLAIN
 FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key)
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value);
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value)
+INSERT OVERWRITE TABLE DEST3 SELECT STRUCT(SRC.key, SRC.value), COUNT(1) GROUP BY STRUCT(SRC.key, SRC.value);
 
 FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key)
-INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value);
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value)
+INSERT OVERWRITE TABLE DEST3 SELECT STRUCT(SRC.key, SRC.value), COUNT(1) GROUP BY STRUCT(SRC.key, SRC.value);
 
 SELECT DEST1.* FROM DEST1;
 SELECT DEST2.* FROM DEST2;
+SELECT DEST3.* FROM DEST3;
 

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q Tue Jul  9 09:07:35 2013
@@ -12,13 +12,13 @@ SELECT * FROM
 (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
 join
 (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
-on subq1.a = subq2.a;
+on subq1.a = subq2.a order by subq1.a, subq1.b, subq2.a, subq2.b;
 
 SELECT * FROM
 (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
 join
 (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
-on subq1.a = subq2.a;
+on subq1.a = subq2.a order by subq1.a, subq1.b, subq2.a, subq2.b;
 
 set hive.new.job.grouping.set.cardinality=2;
 
@@ -29,11 +29,11 @@ SELECT * FROM
 (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
 join
 (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
-on subq1.a = subq2.a;
+on subq1.a = subq2.a order by subq1.a, subq1.b, subq2.a, subq2.b;
 
 SELECT * FROM
 (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
 join
 (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
-on subq1.a = subq2.a;
+on subq1.a = subq2.a order by subq1.a, subq1.b, subq2.a, subq2.b;
 

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/join32_lessSize.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/join32_lessSize.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/join32_lessSize.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/join32_lessSize.q Tue Jul  9 09:07:35 2013
@@ -18,7 +18,7 @@ SELECT x.key, z.value, y.value
 FROM src1 x JOIN src y ON (x.key = y.key) 
 JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
 
-select * from dest_j1 x order by x.key;
+select * from dest_j1 x order by x.value;
 
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE dest_j1
@@ -33,7 +33,7 @@ FROM src w JOIN src1 x ON (x.value = w.v
 JOIN src y ON (x.key = y.key) 
 JOIN src1 z ON (x.key = z.key);
 
-select * from dest_j1 x order by x.key;
+select * from dest_j1 x order by x.value;
 
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE dest_j2
@@ -46,7 +46,7 @@ SELECT res.key, z.value, res.value
 FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res 
 JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11);
 
-select * from dest_j2 x order by x.key;
+select * from dest_j2 x order by x.value;
 
 EXPLAIN EXTENDED
 INSERT OVERWRITE TABLE dest_j2
@@ -59,7 +59,7 @@ SELECT res.key, z.value, res.value
 FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res 
 JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11);
 
-select * from dest_j2 x order by x.key;
+select * from dest_j2 x order by x.value;
 
 EXPLAIN
 INSERT OVERWRITE TABLE dest_j2
@@ -72,7 +72,7 @@ SELECT res.key, x.value, res.value  
 FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res 
 JOIN srcpart x ON (res.value = x.value and x.ds='2008-04-08' and x.hr=11);
 
-select * from dest_j2 x order by x.key;
+select * from dest_j2 x order by x.value;
 
 EXPLAIN
 INSERT OVERWRITE TABLE dest_j2
@@ -85,4 +85,4 @@ SELECT res.key, y.value, res.value
 FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res 
 JOIN srcpart y ON (res.value = y.value and y.ds='2008-04-08' and y.hr=11);
 
-select * from dest_j2 x order by x.key;
+select * from dest_j2 x order by x.value;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_12.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_12.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_12.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_12.q Tue Jul  9 09:07:35 2013
@@ -29,14 +29,14 @@ desc formatted list_bucketing_mul_col pa
 set hive.optimize.listbucketing=true;
 explain extended
 select * from list_bucketing_mul_col 
-where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466";
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" ORDER BY col2, col4, ds, hr;
 select * from list_bucketing_mul_col 
-where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466";
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" ORDER BY col2, col4, ds, hr;
 
 explain extended
 select * from list_bucketing_mul_col 
-where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382";
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" ORDER BY col2, col4, ds, hr;
 select * from list_bucketing_mul_col 
-where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382";
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" ORDER BY col2, col4, ds, hr;
 
 drop table list_bucketing_mul_col;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_13.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_13.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_13.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_13.q Tue Jul  9 09:07:35 2013
@@ -29,8 +29,8 @@ desc formatted list_bucketing_mul_col pa
 set hive.optimize.listbucketing=true;
 explain extended
 select * from list_bucketing_mul_col 
-where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466";
+where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" ORDER BY col2, col4, ds, hr;
 select * from list_bucketing_mul_col 
-where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466";
+where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" ORDER BY col2, col4, ds, hr;
 
 drop table list_bucketing_mul_col;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_2.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_2.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_2.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_2.q Tue Jul  9 09:07:35 2013
@@ -46,16 +46,16 @@ select count(*) from list_bucketing_stat
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 set hive.optimize.listbucketing=true;
 explain extended
-select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
-select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
-select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value;
 
 -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none
 -- but query should succeed for 51 or 51 and val_14
-select * from srcpart where ds = '2008-04-08' and key = '51';
-select * from list_bucketing_static_part where key = '51';
-select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14';
-select * from list_bucketing_static_part where key = '51' and value = 'val_14';
+select * from srcpart where ds = '2008-04-08' and key = '51' ORDER BY key, value;
+select * from list_bucketing_static_part where key = '51' ORDER BY key, value, ds, hr;
+select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14'  ORDER BY key, value;
+select * from list_bucketing_static_part where key = '51' and value = 'val_14' ORDER BY key, value, ds, hr;
 
 -- queries with < <= > >= should work for skewed test although we don't benefit from pruning
 select count(1) from srcpart where ds = '2008-04-08' and key < '51';

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_4.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_4.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_4.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_4.q Tue Jul  9 09:07:35 2013
@@ -63,9 +63,9 @@ select count(*) from list_bucketing_stat
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 set hive.optimize.listbucketing=true;
 explain extended
-select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
-select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
-select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value;
 
 -- clean up
 drop table list_bucketing_static_part;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_5.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_5.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_5.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_5.q Tue Jul  9 09:07:35 2013
@@ -28,11 +28,11 @@ desc formatted list_bucketing_dynamic_pa
 select count(1) from srcpart where ds='2008-04-08';
 select count(1) from list_bucketing_dynamic_part where ds='2008-04-08';
 
-select key, value from srcpart where ds='2008-04-08' and key = "103" and value ="val_103";
+select key, value from srcpart where ds='2008-04-08' and key = "103" and value ="val_103" ORDER BY key, value;
 set hive.optimize.listbucketing=true;
 explain extended
-select key, value from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103";
-select key, value from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103";
+select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" ORDER BY key, value, ds, hr;
+select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" ORDER BY key, value, ds, hr;
 
 -- clean up resources
 drop table list_bucketing_dynamic_part;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_9.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_9.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_9.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_dml_9.q Tue Jul  9 09:07:35 2013
@@ -63,9 +63,9 @@ select count(*) from list_bucketing_stat
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 set hive.optimize.listbucketing=true;
 explain extended
-select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
-select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484';
-select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
+select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
 
 -- clean up
 drop table list_bucketing_static_part;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_1.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_1.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_1.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_1.q Tue Jul  9 09:07:35 2013
@@ -44,7 +44,7 @@ LOCATION '${hiveconf:hive.metastore.ware
 alter table fact_daily PARTITION (ds = '1') set skewed location (484='${hiveconf:hive.metastore.warehouse.dir}/fact_tz/ds=1/x=484','HIVE_DEFAULT_LIST_BUCKETING_KEY'='${hiveconf:hive.metastore.warehouse.dir}/fact_tz/ds=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME');
 describe formatted fact_daily PARTITION (ds = '1');
 	
-SELECT * FROM fact_daily WHERE ds='1';	
+SELECT * FROM fact_daily WHERE ds='1' ORDER BY x;	
 
 -- pruner only pick up skewed-value directory
 -- explain plan shows which directory selected: Truncated Path -> Alias

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_2.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_2.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_2.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_2.q Tue Jul  9 09:07:35 2013
@@ -45,7 +45,7 @@ LOCATION '${hiveconf:hive.metastore.ware
 alter table fact_daily PARTITION (ds = '1') set skewed location (484='${hiveconf:hive.metastore.warehouse.dir}/fact_tz/ds=1/x=484','HIVE_DEFAULT_LIST_BUCKETING_KEY'='${hiveconf:hive.metastore.warehouse.dir}/fact_tz/ds=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME');
 describe formatted fact_daily PARTITION (ds = '1');
 	
-SELECT * FROM fact_daily WHERE ds='1';	
+SELECT * FROM fact_daily WHERE ds='1' ORDER BY x, y;
 
 -- The first subquery
 -- explain plan shows which directory selected: Truncated Path -> Alias
@@ -55,9 +55,9 @@ select x from (select * from fact_daily 
 
 -- The second subquery
 -- explain plan shows which directory selected: Truncated Path -> Alias
-explain extended select x1, y1 from(select x as x1, y as y1 from fact_daily where ds ='1') subq where x1 = 484;
+explain extended select x1, y1 from(select x as x1, y as y1 from fact_daily where ds ='1') subq where x1 = 484 ORDER BY x1, y1;
 -- List Bucketing Query
-select x1, y1 from(select x as x1, y as y1 from fact_daily where ds ='1') subq where x1 = 484;
+select x1, y1 from(select x as x1, y as y1 from fact_daily where ds ='1') subq where x1 = 484 ORDER BY x1, y1;
 
 
 -- The third subquery

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_3.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_3.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_3.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/list_bucket_query_oneskew_3.q Tue Jul  9 09:07:35 2013
@@ -52,10 +52,10 @@ alter table fact_daily PARTITION (ds = '
 'HIVE_DEFAULT_LIST_BUCKETING_KEY'='${hiveconf:hive.metastore.warehouse.dir}/fact_tz/ds=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME');
 describe formatted fact_daily PARTITION (ds = '1');
 	
-SELECT * FROM fact_daily WHERE ds='1';	
+SELECT * FROM fact_daily WHERE ds='1' ORDER BY x, y, z;	
 
 -- pruner  pick up right directory
 -- explain plan shows which directory selected: Truncated Path -> Alias
-explain extended SELECT x FROM fact_daily WHERE ds='1' and not (x = 86);
+explain extended SELECT x FROM fact_daily WHERE ds='1' and not (x = 86) ORDER BY x;
 -- List Bucketing Query
-SELECT x,y,z FROM fact_daily WHERE ds='1' and not (x = 86);
+SELECT x FROM fact_daily WHERE ds='1' and not (x = 86) ORDER BY x;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/multi_insert_lateral_view.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/multi_insert_lateral_view.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/multi_insert_lateral_view.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/multi_insert_lateral_view.q Tue Jul  9 09:07:35 2013
@@ -18,8 +18,8 @@ from src_10
 insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C
 insert overwrite table src_lv2 select key, C lateral view explode(array(key+3, key+4)) A as C;
 
-select * from src_lv1;
-select * from src_lv2;
+select * from src_lv1 order by key, value;
+select * from src_lv2 order by key, value;
 
 -- 2(LV+GBY)
 -- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[11]-GBY[12]-RS[13]-GBY[14]-SEL[15]-FS[16]
@@ -35,8 +35,8 @@ from src_10
 insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key
 insert overwrite table src_lv2 select key, sum(C) lateral view explode(array(key+3, key+4)) A as C group by key;
 
-select * from src_lv1;
-select * from src_lv2;
+select * from src_lv1 order by key, value;
+select * from src_lv2 order by key, value;
 
 -- (LV+GBY) + RS:2GBY
 -- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[6]-GBY[7]-RS[8]-GBY[9]-SEL[10]-FS[11]
@@ -54,9 +54,9 @@ insert overwrite table src_lv1 select ke
 insert overwrite table src_lv2 select key, count(value) where key > 200 group by key
 insert overwrite table src_lv3 select key, count(value) where key < 200 group by key;
 
-select * from src_lv1;
-select * from src_lv2;
-select * from src_lv3;
+select * from src_lv1 order by key, value;
+select * from src_lv2 order by key, value;
+select * from src_lv3 order by key, value;
 
 -- todo: shared distinct columns (should work with hive.optimize.multigroupby.common.distincts)
 -- 2(LV+GBY) + RS:2GBY
@@ -76,9 +76,9 @@ insert overwrite table src_lv1 select C,
 insert overwrite table src_lv2 select C, sum(distinct key) lateral view explode(array(key+3, key+4)) A as C group by C
 insert overwrite table src_lv3 select value, sum(distinct key) group by value;
 
-select * from src_lv1;
-select * from src_lv2;
-select * from src_lv3;
+select * from src_lv1 order by key, value;
+select * from src_lv2 order by key, value;
+select * from src_lv3 order by key, value;
 
 create table src_lv4 (key string, value string);
 
@@ -96,7 +96,7 @@ insert overwrite table src_lv2 select ke
 insert overwrite table src_lv3 select value, sum(distinct key) where key > 200 group by value
 insert overwrite table src_lv4 select value, sum(distinct key) where key < 200 group by value;
 
-select * from src_lv1;
-select * from src_lv2;
-select * from src_lv3;
-select * from src_lv4;
+select * from src_lv1 order by key, value;
+select * from src_lv2 order by key, value;
+select * from src_lv3 order by key, value;
+select * from src_lv4 order by key, value;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/orc_createas1.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/orc_createas1.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/orc_createas1.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/orc_createas1.q Tue Jul  9 09:07:35 2013
@@ -20,7 +20,7 @@ CREATE TABLE orc_createas1b
     STORED AS ORC AS
     SELECT * FROM src;
 
-EXPLAIN SELECT * FROM orc_createas1b LIMIT 5;
+EXPLAIN SELECT * FROM orc_createas1b ORDER BY key LIMIT 5;
 
 SELECT * FROM orc_createas1b ORDER BY key LIMIT 5;
 

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/orc_diff_part_cols.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/orc_diff_part_cols.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/orc_diff_part_cols.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/orc_diff_part_cols.q Tue Jul  9 09:07:35 2013
@@ -16,4 +16,4 @@ ALTER TABLE test_orc ADD COLUMNS (cnt IN
 
 INSERT OVERWRITE TABLE test_orc PARTITION (part = '2') SELECT key, count(*) FROM src GROUP BY key LIMIT 5;
 
-SELECT * FROM test_orc; 
+SELECT * FROM test_orc ORDER BY key; 

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/pcr.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/pcr.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/pcr.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/pcr.q Tue Jul  9 09:07:35 2013
@@ -102,3 +102,40 @@ select key, value, ds, hr from srcpart w
 drop table pcr_t1;
 drop table pcr_t2;
 drop table pcr_t3;
+
+
+-- Test cases when a non-boolean ds expression has same and different values for all possible ds values: 
+drop table pcr_foo;
+create table pcr_foo (key int, value string) partitioned by (ds int);
+insert overwrite table pcr_foo partition (ds=3) select * from src where key < 10 order by key;
+insert overwrite table pcr_foo partition (ds=5) select * from src where key < 10 order by key;
+insert overwrite table pcr_foo partition (ds=7) select * from src where key < 10 order by key;
+
+-- the condition is 'true' for all the 3 partitions (ds=3,5,7):
+select key, value, ds from pcr_foo where (ds % 2 == 1);
+
+-- the condition is 'true' for partitions (ds=3,5) but 'false' of partition ds=7:
+select key, value, ds from pcr_foo where (ds / 3 < 2);
+
+drop table pcr_foo;
+
+
+
+-- Cover org.apache.hadoop.hive.ql.optimizer.pcr.PcrExprProcFactory.FieldExprProcessor.
+-- Create a table with a struct data:
+create table ab(strct struct<a:int, b:string>)
+row format delimited
+  fields terminated by '\t'
+  collection items terminated by '\001';
+load data local inpath '../data/files/kv1.txt'
+overwrite into table ab;
+
+-- Create partitioned table with struct data:
+drop table foo_field;
+create table foo_field (s struct<a:int,b:string>) partitioned by (ds int);
+insert overwrite table foo_field partition (ds=5) select strct from ab where strct.a < 10 limit 2;
+insert overwrite table foo_field partition (ds=7) select strct from ab where strct.a > 190 limit 2;
+select s,ds from foo_field where ((ds + s.a) > 0) order by ds,s;
+
+drop table foo_field;
+

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q Tue Jul  9 09:07:35 2013
@@ -2,12 +2,21 @@ set hive.optimize.reducededuplication=tr
 set hive.optimize.reducededuplication.min.reducer=1;
 set hive.map.aggr=true;
 
+-- HIVE-2340 deduplicate RS followed by RS
+-- hive.optimize.reducededuplication : wherther using this optimization
+-- hive.optimize.reducededuplication.min.reducer : number of reducer of deduped RS should be this at least
+
+-- RS-mGBY-RS-rGBY
 explain select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key;
 explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value);
 explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1);
+-- mGBY-RS-rGBY-RS
 explain select key, sum(key) as value from src group by key order by key, value;
+-- RS-JOIN-mGBY-RS-rGBY
 explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value;
+-- RS-JOIN-RS
 explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
+-- mGBY-RS-rGBY-mGBY-RS-rGBY
 explain from (select key, value from src group by key, value) s select s.key group by s.key;
 
 select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key;
@@ -20,12 +29,17 @@ from (select key, value from src group b
 
 set hive.map.aggr=false;
 
+-- RS-RS-GBY
 explain select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key;
 explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value);
 explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1);
+-- RS-GBY-RS
 explain select key, sum(key) as value from src group by key order by key, value;
+-- RS-JOIN-RS-GBY
 explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value;
+-- RS-JOIN-RS
 explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
+-- RS-GBY-RS-GBY
 explain from (select key, value from src group by key, value) s select s.key group by s.key;
 
 select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key;
@@ -34,4 +48,4 @@ select key, sum(key), (X + 1) from (sele
 select key, sum(key) as value from src group by key order by key, value;
 select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value;
 select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
-from (select key, value from src group by key, value) s select s.key group by s.key;
\ No newline at end of file
+from (select key, value from src group by key, value) s select s.key group by s.key;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/truncate_column.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/truncate_column.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/truncate_column.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/truncate_column.q Tue Jul  9 09:07:35 2013
@@ -9,7 +9,7 @@ INSERT OVERWRITE TABLE test_tab SELECT *
 
 DESC FORMATTED test_tab;
 
-SELECT * FROM test_tab;
+SELECT * FROM test_tab ORDER BY value;
 
 -- Truncate 1 column
 TRUNCATE TABLE test_tab COLUMNS (key);
@@ -17,7 +17,7 @@ TRUNCATE TABLE test_tab COLUMNS (key);
 DESC FORMATTED test_tab;
 
 -- First column should be null
-SELECT * FROM test_tab;
+SELECT * FROM test_tab ORDER BY value;
 
 -- Truncate multiple columns
 INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10;
@@ -27,7 +27,7 @@ TRUNCATE TABLE test_tab COLUMNS (key, va
 DESC FORMATTED test_tab;
 
 -- Both columns should be null
-SELECT * FROM test_tab;
+SELECT * FROM test_tab ORDER BY value;
 
 -- Truncate columns again
 TRUNCATE TABLE test_tab COLUMNS (key, value);
@@ -35,7 +35,7 @@ TRUNCATE TABLE test_tab COLUMNS (key, va
 DESC FORMATTED test_tab;
 
 -- Both columns should be null
-SELECT * FROM test_tab;
+SELECT * FROM test_tab ORDER BY value;
 
 -- Test truncating with a binary serde
 ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
@@ -44,7 +44,7 @@ INSERT OVERWRITE TABLE test_tab SELECT *
 
 DESC FORMATTED test_tab;
 
-SELECT * FROM test_tab;
+SELECT * FROM test_tab ORDER BY value;
 
 -- Truncate 1 column
 TRUNCATE TABLE test_tab COLUMNS (key);
@@ -52,7 +52,7 @@ TRUNCATE TABLE test_tab COLUMNS (key);
 DESC FORMATTED test_tab;
 
 -- First column should be null
-SELECT * FROM test_tab;
+SELECT * FROM test_tab ORDER BY value;
 
 -- Truncate 2 columns
 TRUNCATE TABLE test_tab COLUMNS (key, value);
@@ -60,7 +60,7 @@ TRUNCATE TABLE test_tab COLUMNS (key, va
 DESC FORMATTED test_tab;
 
 -- Both columns should be null
-SELECT * FROM test_tab;
+SELECT * FROM test_tab ORDER BY value;
 
 -- Test truncating a partition
 CREATE TABLE test_tab_part (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE;
@@ -69,11 +69,11 @@ INSERT OVERWRITE TABLE test_tab_part PAR
 
 DESC FORMATTED test_tab_part PARTITION (part = '1');
 
-SELECT * FROM test_tab_part WHERE part = '1';
+SELECT * FROM test_tab_part WHERE part = '1' ORDER BY value;
 
 TRUNCATE TABLE test_tab_part PARTITION (part = '1') COLUMNS (key);
 
 DESC FORMATTED test_tab_part PARTITION (part = '1');
 
 -- First column should be null
-SELECT * FROM test_tab_part WHERE part = '1';
+SELECT * FROM test_tab_part WHERE part = '1' ORDER BY value;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/truncate_column_merge.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/truncate_column_merge.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/truncate_column_merge.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/truncate_column_merge.q Tue Jul  9 09:07:35 2013
@@ -14,7 +14,7 @@ TRUNCATE TABLE test_tab COLUMNS (key);
 ALTER TABLE test_tab CONCATENATE;
 
 -- The first column (key) should be null for all 10 rows
-SELECT * FROM test_tab;
+SELECT * FROM test_tab ORDER BY value;
 
 -- The value should be 1 indicating the table has 1 file
 SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM test_tab;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/udf_inline.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/udf_inline.q?rev=1501145&r1=1501144&r2=1501145&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/udf_inline.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/udf_inline.q Tue Jul  9 09:07:35 2013
@@ -16,3 +16,11 @@ SELECT inline( 
   )
 )  as (id, text) FROM SRC limit 2;
 
+-- HIVE-3475 INLINE UDTF doesn't convert types properly
+select * from (SELECT
+  ARRAY(
+    STRUCT (1,'dude!'),
+    STRUCT (2,'Wheres'),
+    STRUCT (3,'my car?')
+  ) as value FROM SRC limit 1) input
+ LATERAL VIEW inline(value) myTable AS id, text;



Mime
View raw message