hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From eh...@apache.org
Subject svn commit: r1558987 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/io/orc/ java/org/apache/hadoop/hive/ql/optimizer/physical/ test/org/apache/hadoop/hive/ql/opti...
Date Fri, 17 Jan 2014 02:08:47 GMT
Author: ehans
Date: Fri Jan 17 02:08:46 2014
New Revision: 1558987

URL: http://svn.apache.org/r1558987
Log:
HIVE-5595: Implement vectorized SMB JOIN (Remus Rusanu via Eric Hanson)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
    hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q
    hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Fri Jan 17 02:08:46 2014
@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.CollectDesc;
@@ -121,6 +122,7 @@ public final class OperatorFactory {
     vectorOpvec.add(new OpTuple<SelectDesc>(SelectDesc.class, VectorSelectOperator.class));
     vectorOpvec.add(new OpTuple<GroupByDesc>(GroupByDesc.class, VectorGroupByOperator.class));
     vectorOpvec.add(new OpTuple<MapJoinDesc>(MapJoinDesc.class, VectorMapJoinOperator.class));
+    vectorOpvec.add(new OpTuple<SMBJoinDesc>(SMBJoinDesc.class, VectorSMBMapJoinOperator.class));
     vectorOpvec.add(new OpTuple<ReduceSinkDesc>(ReduceSinkDesc.class,
         VectorReduceSinkOperator.class));
     vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class));

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Fri Jan 17 02:08:46 2014
@@ -226,6 +226,11 @@ public class SMBMapJoinOperator extends 
   public void cleanUpInputFileChangedOp() throws HiveException {
     inputFileChanged = true;
   }
+  
+  protected List<Object> smbJoinComputeKeys(Object row, byte alias) throws HiveException {
+    return JoinUtil.computeKeys(row, joinKeys[alias],
+          joinKeysObjectInspectors[alias]);
+  }
 
   @Override
   public void processOp(Object row, int tag) throws HiveException {
@@ -260,8 +265,8 @@ public class SMBMapJoinOperator extends 
     byte alias = (byte) tag;
 
     // compute keys and values as StandardObjects
-    ArrayList<Object> key = JoinUtil.computeKeys(row, joinKeys[alias],
-        joinKeysObjectInspectors[alias]);
+    List<Object> key = smbJoinComputeKeys(row, alias); 
+        
     List<Object> value = getFilteredValue(alias, row);
 
 
@@ -495,7 +500,7 @@ public class SMBMapJoinOperator extends 
     return smallestOne == null ? null : result;
   }
 
-  private boolean processKey(byte alias, ArrayList<Object> key)
+  private boolean processKey(byte alias, List<Object> key)
       throws HiveException {
     List<Object> keyWritable = keyWritables[alias];
     if (keyWritable == null) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Fri Jan 17 02:08:46 2014
@@ -3192,10 +3192,24 @@ public final class Utilities {
     }
   }
 
-  public static void clearWorkMap() {
+  /**
+   * Returns true if a plan is both configured for vectorized execution
+   * and vectorization is allowed. The plan may be configured for vectorization
+   * but vectorization dissalowed eg. for FetchOperator execution. 
+   */
+  public static boolean isVectorMode(Configuration conf) {
+    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) &&
+        Utilities.getPlanPath(conf) != null && Utilities
+        .getMapRedWork(conf).getMapWork().getVectorMode()) {
+      return true;
+    }
+    return false;
+  }
+  
+    public static void clearWorkMap() {
     gWorkMap.clear();
   }
-
+  
   /**
    * Create a temp dir in specified baseDir
    * This can go away once hive moves to support only JDK 7

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java?rev=1558987&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java Fri Jan 17 02:08:46 2014
@@ -0,0 +1,313 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * VectorSMBJoinOperator.
+ * Implements the vectorized SMB join operator. The implementation relies on the row-mode SMB join operator.
+ * It accepts a vectorized batch input from the big table and iterates over the batch, calling the parent row-mode
+ * implementation for each row in the batch.
+ */
+public class VectorSMBMapJoinOperator extends SMBMapJoinOperator implements VectorizationContextRegion {
+
+  private static final Log LOG = LogFactory.getLog(
+      VectorSMBMapJoinOperator.class.getName());  
+  
+  private static final long serialVersionUID = 1L;
+
+  private int tagLen;
+  
+  private transient VectorizedRowBatch outputBatch;  
+  private transient VectorizationContext vOutContext = null;
+  private transient VectorizedRowBatchCtx vrbCtx = null;  
+  
+  private String fileKey;
+
+  private VectorExpression[] bigTableValueExpressions;
+
+  private VectorExpression[] bigTableFilterExpressions;
+
+  private VectorExpression[] keyExpressions;
+
+  private VectorExpressionWriter[] keyOutputWriters;
+
+  private transient VectorHashKeyWrapperBatch keyWrapperBatch;
+
+  private transient Map<ObjectInspector, VectorColumnAssign[]> outputVectorAssigners;
+
+  private transient int batchIndex = -1;
+
+  private transient VectorHashKeyWrapper[] keyValues;
+
+  private transient SMBJoinKeyEvaluator keyEvaluator;
+  
+  private transient VectorExpressionWriter[] valueWriters;
+  
+  private interface SMBJoinKeyEvaluator {
+    List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException;
+}  
+
+  public VectorSMBMapJoinOperator() {
+    super();
+  }
+  
+  public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf)
+      throws HiveException {
+    this();
+    SMBJoinDesc desc = (SMBJoinDesc) conf;
+    this.conf = desc;
+    
+    order = desc.getTagOrder();
+    numAliases = desc.getExprs().size();
+    posBigTable = (byte) desc.getPosBigTable();
+    filterMaps = desc.getFilterMap();
+    tagLen = desc.getTagLength();
+    noOuterJoin = desc.isNoOuterJoin();
+
+    // Must obtain vectorized equivalents for filter and value expressions
+
+    Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters();
+    bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable),
+        VectorExpressionDescriptor.Mode.FILTER);
+
+    List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
+    keyExpressions = vContext.getVectorExpressions(keyDesc);
+    keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
+
+    Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs();
+    bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
+    
+    // Vectorized join operators need to create a new vectorization region for child operators.
+
+    List<String> outColNames = desc.getOutputColumnNames();
+    
+    Map<String, Integer> mapOutCols = new HashMap<String, Integer>(outColNames.size());
+    
+    int outColIndex = 0;
+    for(String outCol: outColNames) {
+      mapOutCols.put(outCol,  outColIndex++);
+    }
+
+    vOutContext = new VectorizationContext(mapOutCols, outColIndex);
+    vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias());
+    this.fileKey = vOutContext.getFileKey();
+  }
+  
+  @Override
+  protected List<Object> smbJoinComputeKeys(Object row, byte alias) throws HiveException {
+    if (alias == this.posBigTable) {
+      VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
+      return keyEvaluator.evaluate(keyValues[batchIndex]);
+    } else {
+      return super.smbJoinComputeKeys(row, alias);
+    }
+  }  
+  
+  @Override
+  protected void initializeOp(Configuration hconf) throws HiveException {
+    super.initializeOp(hconf);
+
+    vrbCtx = new VectorizedRowBatchCtx();
+    vrbCtx.init(hconf, this.fileKey, (StructObjectInspector) this.outputObjInspector);
+    
+    outputBatch = vrbCtx.createVectorizedRowBatch();
+    
+    keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
+    
+    outputVectorAssigners = new HashMap<ObjectInspector, VectorColumnAssign[]>();
+    
+    // This key evaluator translates from the vectorized VectorHashKeyWrapper format
+    // into the row-mode MapJoinKey
+    keyEvaluator = new SMBJoinKeyEvaluator() {
+      private List<Object> key;
+
+      public SMBJoinKeyEvaluator init() {
+        key = new ArrayList<Object>();
+        for(int i = 0; i < keyExpressions.length; ++i) {
+          key.add(null);
+        }
+        return this;
+      }
+
+      @Override
+      public List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException {
+        for(int i = 0; i < keyExpressions.length; ++i) {
+          key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]));
+        }
+        return key;
+      };
+    }.init();
+    
+    Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
+    List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);    
+    
+    // We're hijacking the big table evaluators and replacing them with our own custom ones
+    // which are going to return values from the input batch vector expressions
+    List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
+    
+    VectorExpressionWriterFactory.processVectorExpressions(
+        bigTableExpressions,
+        new VectorExpressionWriterFactory.ListOIDClosure() {
+
+          @Override
+          public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
+            valueWriters = writers;
+            joinValuesObjectInspectors[posBigTable] = oids;
+          }
+        });    
+
+    for(int i=0; i<bigTableExpressions.size(); ++i) {
+      ExprNodeDesc desc = bigTableExpressions.get(i);
+      VectorExpression vectorExpr = bigTableValueExpressions[i];
+
+      // This is a vectorized aware evaluator
+      ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc) {
+        int columnIndex;;
+        int writerIndex;
+
+        public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
+          this.columnIndex = columnIndex;
+          this.writerIndex = writerIndex;
+          return this;
+        }
+
+        @Override
+        public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
+          throw new HiveException("should never reach here");
+        }
+
+        @Override
+        protected Object _evaluate(Object row, int version) throws HiveException {
+          VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
+          int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
+          return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
+        }
+      }.initVectorExpr(vectorExpr.getOutputColumn(), i);
+      vectorNodeEvaluators.add(eval);
+    }
+    // Now replace the old evaluators with our own
+    joinValues[posBigTable] = vectorNodeEvaluators;
+    
+  }
+  
+  @Override
+  public void processOp(Object row, int tag) throws HiveException {
+    byte alias = (byte) tag;
+    
+    if (alias != this.posBigTable) {
+      super.processOp(row, tag);
+    } else {
+  
+      VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
+  
+      if (null != bigTableFilterExpressions) {
+        for(VectorExpression ve : bigTableFilterExpressions) {
+          ve.evaluate(inBatch);
+        }
+      }
+  
+      if (null != bigTableValueExpressions) {
+        for(VectorExpression ve : bigTableValueExpressions) {
+          ve.evaluate(inBatch);
+        }
+      }
+  
+      keyWrapperBatch.evaluateBatch(inBatch);
+      keyValues = keyWrapperBatch.getVectorHashKeyWrappers();
+  
+      // This implementation of vectorized JOIN is delegating all the work
+      // to the row-mode implementation by hijacking the big table node evaluators
+      // and calling the row-mode join processOp for each row in the input batch.
+      // Since the JOIN operator is not fully vectorized anyway at the moment 
+      // (due to the use of row-mode small-tables) this is a reasonable trade-off.
+      //
+      for(batchIndex=0; batchIndex < inBatch.size; ++batchIndex ) {
+        super.processOp(row, tag);
+      }
+  
+      // Set these two to invalid values so any attempt to use them
+      // outside the inner loop results in NPE/OutOfBounds errors
+      batchIndex = -1;
+      keyValues = null;
+    }
+  }
+  
+  @Override
+  public void closeOp(boolean aborted) throws HiveException {
+    super.closeOp(aborted);
+    if (!aborted && 0 < outputBatch.size) {
+      flushOutput();
+    }
+  }
+  
+  @Override
+  protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException {
+    Object[] values = (Object[]) row;
+    VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI);
+    if (null == vcas) {
+      Map<String, Map<String, Integer>> allColumnMaps = Utilities.
+          getMapRedWork(hconf).getMapWork().getScratchColumnMap();
+      Map<String, Integer> columnMap = allColumnMaps.get(fileKey);
+      vcas = VectorColumnAssignFactory.buildAssigners(
+          outputBatch, outputOI, columnMap, conf.getOutputColumnNames());
+      outputVectorAssigners.put(outputOI, vcas);
+    }
+    for (int i = 0; i < values.length; ++i) {
+      vcas[i].assignObjectValue(values[i], outputBatch.size);
+    }
+    ++outputBatch.size;
+    if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
+      flushOutput();
+    }
+  }
+  
+  private void flushOutput() throws HiveException {
+    forward(outputBatch, null);
+    outputBatch.reset();
+  }  
+
+  @Override
+  public VectorizationContext getOuputVectorizationContext() {
+    return vOutContext;
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Fri Jan 17 02:08:46 2014
@@ -299,11 +299,7 @@ public class OrcInputFormat  implements 
   }
 
   private boolean isVectorMode(Configuration conf) {
-    if (Utilities.getPlanPath(conf) != null && Utilities
-        .getMapRedWork(conf).getMapWork().getVectorMode()) {
-      return true;
-    }
-    return false;
+    return Utilities.isVectorMode(conf);
   }
 
   /**

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Fri Jan 17 02:08:46 2014
@@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.exec.Ma
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
@@ -78,6 +79,7 @@ import org.apache.hadoop.hive.ql.plan.Ma
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.ql.plan.TezWork;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
@@ -555,6 +557,8 @@ public class Vectorizer implements Physi
       case MAPJOIN:
         if (op instanceof MapJoinOperator) {
           ret = validateMapJoinOperator((MapJoinOperator) op);
+        } else if (op instanceof SMBMapJoinOperator) {
+          ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op);
         }
         break;
       case GROUPBY:
@@ -583,6 +587,12 @@ public class Vectorizer implements Physi
     return ret;
   }
 
+  private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) {
+    SMBJoinDesc desc = op.getConf();
+    // Validation is the same as for map join, since the 'small' tables are not vectorized
+    return validateMapJoinDesc(desc);
+  }
+
   private boolean validateTableScanOperator(TableScanOperator op) {
     TableScanDesc desc = op.getConf();
     return !desc.isGatherStats();
@@ -590,6 +600,10 @@ public class Vectorizer implements Physi
 
   private boolean validateMapJoinOperator(MapJoinOperator op) {
     MapJoinDesc desc = op.getConf();
+    return validateMapJoinDesc(desc);
+  }
+  
+  private boolean validateMapJoinDesc(MapJoinDesc desc) {
     byte posBigTable = (byte) desc.getPosBigTable();
     List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
     List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable);

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java Fri Jan 17 02:08:46 2014
@@ -146,39 +146,63 @@ public class TestVectorizer {
     Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.FILTER));
     Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION));
   }
+ 
+  /**
+  * prepareAbstractMapJoin prepares a join operator descriptor, used as helper by SMB and Map join tests. 
+  */
+  private void prepareAbstractMapJoin(AbstractMapJoinOperator<? extends MapJoinDesc> mop, MapJoinDesc mjdesc) {
+      mjdesc.setPosBigTable(0);
+      List<ExprNodeDesc> expr = new ArrayList<ExprNodeDesc>();
+      expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false));
+      Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>();
+      keyMap.put((byte)0, expr);
+      mjdesc.setKeys(keyMap);
+      mjdesc.setExprs(keyMap);
 
+      //Set filter expression
+      GenericUDFOPEqual udf = new GenericUDFOPEqual();
+      ExprNodeGenericFuncDesc equalExprDesc = new ExprNodeGenericFuncDesc();
+      equalExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+      equalExprDesc.setGenericUDF(udf);
+      List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
+      children1.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false));
+      children1.add(new ExprNodeColumnDesc(Integer.class, "col3", "T2", false));
+      equalExprDesc.setChildren(children1);
+      List<ExprNodeDesc> filterExpr = new ArrayList<ExprNodeDesc>();
+      filterExpr.add(equalExprDesc);
+      Map<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
+      filterMap.put((byte) 0, expr);
+      mjdesc.setFilters(filterMap);
+ }
+
+  /**
+  * testValidateMapJoinOperator validates that the Map join operator can be vectorized.
+  */
   @Test
   public void testValidateMapJoinOperator() {
     MapJoinOperator mop = new MapJoinOperator();
     MapJoinDesc mjdesc = new MapJoinDesc();
-    mjdesc.setPosBigTable(0);
-    List<ExprNodeDesc> expr = new ArrayList<ExprNodeDesc>();
-    expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false));
-    Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>();
-    keyMap.put((byte)0, expr);
-    mjdesc.setKeys(keyMap);
-    mjdesc.setExprs(keyMap);
-
-    //Set filter expression
-    GenericUDFOPEqual udf = new GenericUDFOPEqual();
-    ExprNodeGenericFuncDesc equalExprDesc = new ExprNodeGenericFuncDesc();
-    equalExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
-    equalExprDesc.setGenericUDF(udf);
-    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
-    children1.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false));
-    children1.add(new ExprNodeColumnDesc(Integer.class, "col3", "T2", false));
-    equalExprDesc.setChildren(children1);
-    List<ExprNodeDesc> filterExpr = new ArrayList<ExprNodeDesc>();
-    filterExpr.add(equalExprDesc);
-    Map<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
-    filterMap.put((byte) 0, expr);
-    mjdesc.setFilters(filterMap);
+    
+    prepareAbstractMapJoin(mop, mjdesc);
     mop.setConf(mjdesc);
-
+ 
     Vectorizer vectorizer = new Vectorizer();
-
     Assert.assertTrue(vectorizer.validateOperator(mop));
-    SMBMapJoinOperator smbmop = new SMBMapJoinOperator(mop);
-    Assert.assertFalse(vectorizer.validateOperator(smbmop));
+  }
+
+  
+  /**
+  * testValidateSMBJoinOperator validates that the SMB join operator can be vectorized.
+  */
+  @Test
+  public void testValidateSMBJoinOperator() {
+      SMBMapJoinOperator mop = new SMBMapJoinOperator();
+      SMBJoinDesc mjdesc = new SMBJoinDesc();
+      
+      prepareAbstractMapJoin(mop, mjdesc);
+      mop.setConf(mjdesc);
+    
+      Vectorizer vectorizer = new Vectorizer();
+      Assert.assertTrue(vectorizer.validateOperator(mop)); 
   }
 }

Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q?rev=1558987&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q Fri Jan 17 02:08:46 2014
@@ -0,0 +1,46 @@
+create table vsmb_bucket_1(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS ORC;
+create table vsmb_bucket_2(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS ORC;
+
+create table vsmb_bucket_RC(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS RCFILE;
+
+create table vsmb_bucket_TXT(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS TEXTFILE;
+  
+insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2;  
+
+set hive.vectorized.execution.enabled=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.sortmerge.join.noconditionaltask = true;
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+
+explain
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key;
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key;
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key;
+
+-- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box
+-- explain
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key;
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key;

Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out?rev=1558987&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out Fri Jan 17 02:08:46 2014
@@ -0,0 +1,370 @@
+PREHOOK: query: create table vsmb_bucket_1(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS ORC
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table vsmb_bucket_1(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@vsmb_bucket_1
+PREHOOK: query: create table vsmb_bucket_2(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS ORC
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table vsmb_bucket_2(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@vsmb_bucket_2
+PREHOOK: query: create table vsmb_bucket_RC(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table vsmb_bucket_RC(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@vsmb_bucket_RC
+PREHOOK: query: create table vsmb_bucket_TXT(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table vsmb_bucket_TXT(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@vsmb_bucket_TXT
+PREHOOK: query: insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@vsmb_bucket_1
+POSTHOOK: query: insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@vsmb_bucket_1
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+PREHOOK: query: insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@vsmb_bucket_2
+POSTHOOK: query: insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@vsmb_bucket_2
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+PREHOOK: query: insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@vsmb_bucket_rc
+POSTHOOK: query: insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@vsmb_bucket_rc
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+PREHOOK: query: insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@vsmb_bucket_txt
+POSTHOOK: query: insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@vsmb_bucket_txt
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+PREHOOK: query: explain
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        b 
+          TableScan
+            alias: b
+            Sorted Merge Bucket Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key} {value}
+                1 {key} {value}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              outputColumnNames: _col0, _col1, _col4, _col5
+              Position of Big Table: 1
+              Vectorized execution: true
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: int
+                      expr: _col1
+                      type: string
+                      expr: _col4
+                      type: int
+                      expr: _col5
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Vectorized execution: true
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  Vectorized execution: true
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+PREHOOK: query: select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_1
+PREHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+POSTHOOK: query: select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_1
+POSTHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+PREHOOK: query: explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_RC) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Sorted Merge Bucket Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key} {value}
+                1 {key} {value}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              outputColumnNames: _col0, _col1, _col4, _col5
+              Position of Big Table: 0
+              Vectorized execution: true
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: int
+                      expr: _col1
+                      type: string
+                      expr: _col4
+                      type: int
+                      expr: _col5
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Vectorized execution: true
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  Vectorized execution: true
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+PREHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_1
+PREHOOK: Input: default@vsmb_bucket_rc
+#### A masked pattern was here ####
+POSTHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_1
+POSTHOOK: Input: default@vsmb_bucket_rc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box
+-- explain
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box
+-- explain
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_TXT) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Sorted Merge Bucket Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key} {value}
+                1 {key} {value}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              outputColumnNames: _col0, _col1, _col4, _col5
+              Position of Big Table: 0
+              Vectorized execution: true
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: int
+                      expr: _col1
+                      type: string
+                      expr: _col4
+                      type: int
+                      expr: _col5
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Vectorized execution: true
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  Vectorized execution: true
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+PREHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_1
+PREHOOK: Input: default@vsmb_bucket_txt
+#### A masked pattern was here ####
+POSTHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_1
+POSTHOOK: Input: default@vsmb_bucket_txt
+#### A masked pattern was here ####
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p
+528534767	cvLH6Eat2yFsyy7p	528534767	cvLH6Eat2yFsyy7p



Mime
View raw message