hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gunt...@apache.org
Subject [7/8] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)
Date Fri, 15 May 2015 21:39:03 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
index 37ccf22..f971727 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
@@ -24,7 +24,9 @@ import java.util.Arrays;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -123,13 +125,6 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
 
       batchCounter++;
 
-      // Do the per-batch setup for an outer join.
-
-      outerPerBatchSetup(batch);
-
-      // For outer join, DO NOT apply filters yet.  It is incorrect for outer join to
-      // apply the filter before hash table matching.
-
       final int inputLogicalSize = batch.size;
 
       if (inputLogicalSize == 0) {
@@ -139,6 +134,44 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
         return;
       }
 
+      // Do the per-batch setup for an outer join.
+
+      outerPerBatchSetup(batch);
+
+      // For outer join, remember our input rows before ON expression filtering or before
+      // hash table matching so we can generate results for all rows (matching and non matching)
+      // later.
+      boolean inputSelectedInUse = batch.selectedInUse;
+      if (inputSelectedInUse) {
+        // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+        //   throw new HiveException("batch.selected is not in sort order and unique");
+        // }
+        System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+      }
+
+      // Filtering for outer join just removes rows available for hash table matching.
+      boolean someRowsFilteredOut =  false;
+      if (bigTableFilterExpressions.length > 0) {
+        // Since the input
+        for (VectorExpression ve : bigTableFilterExpressions) {
+          ve.evaluate(batch);
+        }
+        someRowsFilteredOut = (batch.size != inputLogicalSize);
+        if (LOG.isDebugEnabled()) {
+          if (batch.selectedInUse) {
+            if (inputSelectedInUse) {
+              LOG.debug(CLASS_NAME +
+                  " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+                  " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            } else {
+              LOG.debug(CLASS_NAME +
+                " inputLogicalSize " + inputLogicalSize +
+                " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            }
+          }
+        }
+      }
+
       // Perform any key expressions.  Results will go into scratch columns.
       if (bigTableKeyExpressions != null) {
         for (VectorExpression ve : bigTableKeyExpressions) {
@@ -146,9 +179,6 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Single-Column Long specific declarations.
        */
@@ -178,12 +208,16 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
          */
 
         JoinUtil.JoinResult joinResult;
-        if (!joinColVector.noNulls && joinColVector.isNull[0]) {
-          // Null key is no match for whole batch.
+        if (batch.size == 0) {
+          // Whole repeated key batch was filtered out.
+          joinResult = JoinUtil.JoinResult.NOMATCH;
+        } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
+          // Any (repeated) null key column is no match for whole batch.
           joinResult = JoinUtil.JoinResult.NOMATCH;
         } else {
           // Handle *repeated* join key, if found.
           long key = vector[0];
+          // LOG.debug(CLASS_NAME + " repeated key " + key);
           if (useMinMax && (key < min || key > max)) {
             // Out of range for whole batch.
             joinResult = JoinUtil.JoinResult.NOMATCH;
@@ -199,7 +233,8 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+        finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+            inputSelectedInUse, inputLogicalSize);
       } else {
 
         /*
@@ -213,14 +248,13 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
         int selected[] = batch.selected;
         boolean selectedInUse = batch.selectedInUse;
 
-        // For outer join we must apply the filter after match and cause some matches to become
-        // non-matches, we do not track non-matches here.  Instead we remember all non spilled rows
-        // and compute non matches later in finishOuter.
         int hashMapResultCount = 0;
-        int matchCount = 0;
-        int nonSpillCount = 0;
+        int allMatchCount = 0;
+        int equalKeySeriesCount = 0;
         int spillCount = 0;
 
+        boolean atLeastOneNonMatch = someRowsFilteredOut;
+
         /*
          * Single-Column Long specific variables.
          */
@@ -232,9 +266,11 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
         JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
 
         // Logical loop over the rows in the batch since the batch may have selected in use.
-        for (int logical = 0; logical < inputLogicalSize; logical++) {
+        for (int logical = 0; logical < batch.size; logical++) {
           int batchIndex = (selectedInUse ? selected[logical] : logical);
 
+          // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
           /*
            * Single-Column Long outer null detection.
            */
@@ -250,8 +286,8 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
             //    Let a current SPILL equal key series keep going, or
             //    Let a current NOMATCH keep not matching.
 
-            // Remember non-matches for Outer Join.
-            nonSpills[nonSpillCount++] = batchIndex;
+            atLeastOneNonMatch = true;
+
             // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
           } else {
 
@@ -269,9 +305,12 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
               // New key.
 
               if (haveSaveKey) {
-                // Move on with our count(s).
+                // Move on with our counts.
                 switch (saveJoinResult) {
                 case MATCH:
+                  hashMapResultCount++;
+                  equalKeySeriesCount++;
+                  break;
                 case SPILL:
                   hashMapResultCount++;
                   break;
@@ -300,41 +339,70 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
               } else {
                 saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]);
               }
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
-            } else {
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
-            }
 
-            /*
-             * Common outer join result processing.
-             */
+              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + currentKey + " " + saveJoinResult.name());
 
-            switch (saveJoinResult) {
-            case MATCH:
-              matchs[matchCount] = batchIndex;
-              matchHashMapResultIndices[matchCount] = hashMapResultCount;
-              matchCount++;
-              nonSpills[nonSpillCount++] = batchIndex;
-              break;
-
-            case SPILL:
-              spills[spillCount] = batchIndex;
-              spillHashMapResultIndices[spillCount] = hashMapResultCount;
-              spillCount++;
-              break;
-
-            case NOMATCH:
-              nonSpills[nonSpillCount++] = batchIndex;
-              // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
-              break;
+              /*
+               * Common outer join result processing.
+               */
+
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                atLeastOneNonMatch = true;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+                break;
+              }
+            } else {
+              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+              // Series of equal keys.
+
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+                break;
+              }
             }
+            // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+            //   throw new HiveException("allMatchs is not in sort order and unique");
+            // }
           }
         }
 
         if (haveSaveKey) {
-          // Account for last equal key sequence.
+          // Update our counts for the last key.
           switch (saveJoinResult) {
           case MATCH:
+            hashMapResultCount++;
+            equalKeySeriesCount++;
+            break;
           case SPILL:
             hashMapResultCount++;
             break;
@@ -345,27 +413,26 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
 
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter +
-              " matchs " + intArrayToRangesString(matchs, matchCount) +
-              " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
-              " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+              " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+              " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+              " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+              " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+              " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+              " atLeastOneNonMatch " + atLeastOneNonMatch +
+              " inputSelectedInUse " + inputSelectedInUse +
+              " inputLogicalSize " + inputLogicalSize +
               " spills " + intArrayToRangesString(spills, spillCount) +
               " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
         }
 
         // We will generate results for all matching and non-matching rows.
-        // Note that scratch1 is undefined at this point -- it's preallocated storage.
-        numSel = finishOuter(batch,
-                    matchs, matchHashMapResultIndices, matchCount,
-                    nonSpills, nonSpillCount,
-                    spills, spillHashMapResultIndices, spillCount,
-                    hashMapResults, hashMapResultCount,
-                    scratch1);
+        finishOuter(batch,
+            allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+            inputSelectedInUse, inputLogicalSize,
+            spillCount, hashMapResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
index 23a29f7..bea032a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
@@ -128,13 +128,6 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
 
       batchCounter++;
 
-      // Do the per-batch setup for an outer join.
-
-      outerPerBatchSetup(batch);
-
-      // For outer join, DO NOT apply filters yet.  It is incorrect for outer join to
-      // apply the filter before hash table matching.
-
       final int inputLogicalSize = batch.size;
 
       if (inputLogicalSize == 0) {
@@ -144,6 +137,44 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
         return;
       }
 
+      // Do the per-batch setup for an outer join.
+
+      outerPerBatchSetup(batch);
+
+      // For outer join, remember our input rows before ON expression filtering or before
+      // hash table matching so we can generate results for all rows (matching and non matching)
+      // later.
+      boolean inputSelectedInUse = batch.selectedInUse;
+      if (inputSelectedInUse) {
+        // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+        //   throw new HiveException("batch.selected is not in sort order and unique");
+        // }
+        System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+      }
+
+      // Filtering for outer join just removes rows available for hash table matching.
+      boolean someRowsFilteredOut =  false;
+      if (bigTableFilterExpressions.length > 0) {
+        // Since the input
+        for (VectorExpression ve : bigTableFilterExpressions) {
+          ve.evaluate(batch);
+        }
+        someRowsFilteredOut = (batch.size != inputLogicalSize);
+        if (LOG.isDebugEnabled()) {
+          if (batch.selectedInUse) {
+            if (inputSelectedInUse) {
+              LOG.debug(CLASS_NAME +
+                  " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+                  " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            } else {
+              LOG.debug(CLASS_NAME +
+                " inputLogicalSize " + inputLogicalSize +
+                " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            }
+          }
+        }
+      }
+
       // Perform any key expressions.  Results will go into scratch columns.
       if (bigTableKeyExpressions != null) {
         for (VectorExpression ve : bigTableKeyExpressions) {
@@ -151,9 +182,6 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Multi-Key specific declarations.
        */
@@ -199,8 +227,11 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
          */
 
         JoinUtil.JoinResult joinResult;
-        if (someKeyInputColumnIsNull) {
-          // Any null key column is no match for whole batch.
+        if (batch.size == 0) {
+          // Whole repeated key batch was filtered out.
+          joinResult = JoinUtil.JoinResult.NOMATCH;
+        } else if (someKeyInputColumnIsNull) {
+          // Any (repeated) null key column is no match for whole batch.
           joinResult = JoinUtil.JoinResult.NOMATCH;
         } else {
 
@@ -219,7 +250,8 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+        finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+            inputSelectedInUse, inputLogicalSize);
       } else {
 
         /*
@@ -233,14 +265,13 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
         int selected[] = batch.selected;
         boolean selectedInUse = batch.selectedInUse;
 
-        // For outer join we must apply the filter after match and cause some matches to become
-        // non-matches, we do not track non-matches here.  Instead we remember all non spilled rows
-        // and compute non matches later in finishOuter.
         int hashMapResultCount = 0;
-        int matchCount = 0;
-        int nonSpillCount = 0;
+        int allMatchCount = 0;
+        int equalKeySeriesCount = 0;
         int spillCount = 0;
 
+        boolean atLeastOneNonMatch = someRowsFilteredOut;
+
         /*
          * Multi-Key specific variables.
          */
@@ -252,9 +283,11 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
         JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
 
         // Logical loop over the rows in the batch since the batch may have selected in use.
-        for (int logical = 0; logical < inputLogicalSize; logical++) {
+        for (int logical = 0; logical < batch.size; logical++) {
           int batchIndex = (selectedInUse ? selected[logical] : logical);
 
+          // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
           /*
            * Multi-Key outer null detection.
            */
@@ -272,8 +305,8 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
             //    Let a current SPILL equal key series keep going, or
             //    Let a current NOMATCH keep not matching.
 
-            // Remember non-matches for Outer Join.
-            nonSpills[nonSpillCount++] = batchIndex;
+            atLeastOneNonMatch = true;
+
             // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
           } else {
 
@@ -292,9 +325,12 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
               // New key.
 
               if (haveSaveKey) {
-                // Move on with our count(s).
+                // Move on with our counts.
                 switch (saveJoinResult) {
                 case MATCH:
+                  hashMapResultCount++;
+                  equalKeySeriesCount++;
+                  break;
                 case SPILL:
                   hashMapResultCount++;
                   break;
@@ -322,41 +358,68 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
               byte[] keyBytes = saveKeyOutput.getData();
               int keyLength = saveKeyOutput.getLength();
               saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]);
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
-            } else {
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
-            }
 
-            /*
-             * Common outer join result processing.
-             */
+              /*
+               * Common outer join result processing.
+               */
 
-            switch (saveJoinResult) {
-            case MATCH:
-              matchs[matchCount] = batchIndex;
-              matchHashMapResultIndices[matchCount] = hashMapResultCount;
-              matchCount++;
-              nonSpills[nonSpillCount++] = batchIndex;
-              break;
-
-            case SPILL:
-              spills[spillCount] = batchIndex;
-              spillHashMapResultIndices[spillCount] = hashMapResultCount;
-              spillCount++;
-              break;
-
-            case NOMATCH:
-              nonSpills[nonSpillCount++] = batchIndex;
-              // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
-              break;
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                atLeastOneNonMatch = true;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+                break;
+              }
+            } else {
+              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+              // Series of equal keys.
+
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+                break;
+              }
             }
+            // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+            //   throw new HiveException("allMatchs is not in sort order and unique");
+            // }
           }
         }
 
         if (haveSaveKey) {
-          // Account for last equal key sequence.
+          // Update our counts for the last key.
           switch (saveJoinResult) {
           case MATCH:
+            hashMapResultCount++;
+            equalKeySeriesCount++;
+            break;
           case SPILL:
             hashMapResultCount++;
             break;
@@ -367,27 +430,26 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
 
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter +
-              " matchs " + intArrayToRangesString(matchs, matchCount) +
-              " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
-              " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+              " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+              " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+              " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+              " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+              " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+              " atLeastOneNonMatch " + atLeastOneNonMatch +
+              " inputSelectedInUse " + inputSelectedInUse +
+              " inputLogicalSize " + inputLogicalSize +
               " spills " + intArrayToRangesString(spills, spillCount) +
               " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
         }
 
         // We will generate results for all matching and non-matching rows.
-        // Note that scratch1 is undefined at this point -- it's preallocated storage.
-        numSel = finishOuter(batch,
-                    matchs, matchHashMapResultIndices, matchCount,
-                    nonSpills, nonSpillCount,
-                    spills, spillHashMapResultIndices, spillCount,
-                    hashMapResults, hashMapResultCount,
-                    scratch1);
+        finishOuter(batch,
+            allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+            inputSelectedInUse, inputLogicalSize,
+            spillCount, hashMapResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
index f0af3f6..49efe1a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
@@ -115,13 +115,6 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
 
       batchCounter++;
 
-      // Do the per-batch setup for an outer join.
-
-      outerPerBatchSetup(batch);
-
-      // For outer join, DO NOT apply filters yet.  It is incorrect for outer join to
-      // apply the filter before hash table matching.
-
       final int inputLogicalSize = batch.size;
 
       if (inputLogicalSize == 0) {
@@ -131,6 +124,44 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
         return;
       }
 
+      // Do the per-batch setup for an outer join.
+
+      outerPerBatchSetup(batch);
+
+      // For outer join, remember our input rows before ON expression filtering or before
+      // hash table matching so we can generate results for all rows (matching and non matching)
+      // later.
+      boolean inputSelectedInUse = batch.selectedInUse;
+      if (inputSelectedInUse) {
+        // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+        //   throw new HiveException("batch.selected is not in sort order and unique");
+        // }
+        System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+      }
+
+      // Filtering for outer join just removes rows available for hash table matching.
+      boolean someRowsFilteredOut =  false;
+      if (bigTableFilterExpressions.length > 0) {
+        // Since the input
+        for (VectorExpression ve : bigTableFilterExpressions) {
+          ve.evaluate(batch);
+        }
+        someRowsFilteredOut = (batch.size != inputLogicalSize);
+        if (LOG.isDebugEnabled()) {
+          if (batch.selectedInUse) {
+            if (inputSelectedInUse) {
+              LOG.debug(CLASS_NAME +
+                  " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+                  " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            } else {
+              LOG.debug(CLASS_NAME +
+                " inputLogicalSize " + inputLogicalSize +
+                " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            }
+          }
+        }
+      }
+
       // Perform any key expressions.  Results will go into scratch columns.
       if (bigTableKeyExpressions != null) {
         for (VectorExpression ve : bigTableKeyExpressions) {
@@ -138,9 +169,6 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Single-Column String specific declarations.
        */
@@ -172,8 +200,11 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
          */
 
         JoinUtil.JoinResult joinResult;
-        if (!joinColVector.noNulls && joinColVector.isNull[0]) {
-          // Null key is no match for whole batch.
+        if (batch.size == 0) {
+          // Whole repeated key batch was filtered out.
+          joinResult = JoinUtil.JoinResult.NOMATCH;
+        } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
+          // Any (repeated) null key column is no match for whole batch.
           joinResult = JoinUtil.JoinResult.NOMATCH;
         } else {
           // Handle *repeated* join key, if found.
@@ -190,7 +221,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+        finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+            inputSelectedInUse, inputLogicalSize);
       } else {
 
         /*
@@ -204,14 +236,13 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
         int selected[] = batch.selected;
         boolean selectedInUse = batch.selectedInUse;
 
-        // For outer join we must apply the filter after match and cause some matches to become
-        // non-matches, we do not track non-matches here.  Instead we remember all non spilled rows
-        // and compute non matches later in finishOuter.
         int hashMapResultCount = 0;
-        int matchCount = 0;
-        int nonSpillCount = 0;
+        int allMatchCount = 0;
+        int equalKeySeriesCount = 0;
         int spillCount = 0;
 
+        boolean atLeastOneNonMatch = someRowsFilteredOut;
+
         /*
          * Single-Column String specific variables.
          */
@@ -223,9 +254,11 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
         JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
 
         // Logical loop over the rows in the batch since the batch may have selected in use.
-        for (int logical = 0; logical < inputLogicalSize; logical++) {
+        for (int logical = 0; logical < batch.size; logical++) {
           int batchIndex = (selectedInUse ? selected[logical] : logical);
 
+          // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
           /*
            * Single-Column String outer null detection.
            */
@@ -241,8 +274,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
             //    Let a current SPILL equal key series keep going, or
             //    Let a current NOMATCH keep not matching.
 
-            // Remember non-matches for Outer Join.
-            nonSpills[nonSpillCount++] = batchIndex;
+            atLeastOneNonMatch = true;
+
             // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
           } else {
 
@@ -262,9 +295,12 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
               // New key.
 
               if (haveSaveKey) {
-                // Move on with our count(s).
+                // Move on with our counts.
                 switch (saveJoinResult) {
                 case MATCH:
+                  hashMapResultCount++;
+                  equalKeySeriesCount++;
+                  break;
                 case SPILL:
                   hashMapResultCount++;
                   break;
@@ -290,43 +326,69 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
               byte[] keyBytes = vector[batchIndex];
               int keyStart = start[batchIndex];
               int keyLength = length[batchIndex];
-
               saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]);
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
-            } else {
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
-            }
 
-            /*
-             * Common outer join result processing.
-             */
+              /*
+               * Common outer join result processing.
+               */
 
-            switch (saveJoinResult) {
-            case MATCH:
-              matchs[matchCount] = batchIndex;
-              matchHashMapResultIndices[matchCount] = hashMapResultCount;
-              matchCount++;
-              nonSpills[nonSpillCount++] = batchIndex;
-              break;
-
-            case SPILL:
-              spills[spillCount] = batchIndex;
-              spillHashMapResultIndices[spillCount] = hashMapResultCount;
-              spillCount++;
-              break;
-
-            case NOMATCH:
-              nonSpills[nonSpillCount++] = batchIndex;
-              // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
-              break;
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                atLeastOneNonMatch = true;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+                break;
+              }
+            } else {
+              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+              // Series of equal keys.
+
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+                break;
+              }
             }
+            // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+            //   throw new HiveException("allMatchs is not in sort order and unique");
+            // }
           }
         }
 
         if (haveSaveKey) {
-          // Account for last equal key sequence.
+          // Update our counts for the last key.
           switch (saveJoinResult) {
           case MATCH:
+            hashMapResultCount++;
+            equalKeySeriesCount++;
+            break;
           case SPILL:
             hashMapResultCount++;
             break;
@@ -337,27 +399,26 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
 
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter +
-              " matchs " + intArrayToRangesString(matchs, matchCount) +
-              " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
-              " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+              " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+              " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+              " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+              " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+              " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+              " atLeastOneNonMatch " + atLeastOneNonMatch +
+              " inputSelectedInUse " + inputSelectedInUse +
+              " inputLogicalSize " + inputLogicalSize +
               " spills " + intArrayToRangesString(spills, spillCount) +
               " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
         }
 
         // We will generate results for all matching and non-matching rows.
-        // Note that scratch1 is undefined at this point -- it's preallocated storage.
-        numSel = finishOuter(batch,
-                    matchs, matchHashMapResultIndices, matchCount,
-                    nonSpills, nonSpillCount,
-                    spills, spillHashMapResultIndices, spillCount,
-                    hashMapResults, hashMapResultCount,
-                    scratch1);
+        finishOuter(batch,
+            allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+            inputSelectedInUse, inputLogicalSize,
+            spillCount, hashMapResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
index 1c91be6..32b60d0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
@@ -91,7 +91,7 @@ public class VectorMapJoinRowBytesContainer {
     }
 
     tmpFile = File.createTempFile("BytesContainer", ".tmp", parentFile);
-    LOG.info("BytesContainer created temp file " + tmpFile.getAbsolutePath());
+    LOG.debug("BytesContainer created temp file " + tmpFile.getAbsolutePath());
     tmpFile.deleteOnExit();
 
     fileOutputStream = new FileOutputStream(tmpFile);

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
index f9550c9..6afaec3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
@@ -54,13 +54,13 @@ public abstract class VectorMapJoinFastBytesHashMap
       slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
       slotTriples[tripleIndex + 1] = hashCode;
       slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength);
-      // LOG.info("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
       keysAssigned++;
     } else {
       // Add another value.
-      // LOG.info("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
       slotTriples[tripleIndex + 2] = valueStore.addMore(slotTriples[tripleIndex + 2], valueBytes, 0, valueLength);
-      // LOG.info("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
     }
   }
 
@@ -77,7 +77,7 @@ public abstract class VectorMapJoinFastBytesHashMap
     if (valueRefWord == -1) {
       joinResult = JoinUtil.JoinResult.NOMATCH;
     } else {
-      // LOG.info("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null));
+      // LOG.debug("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null));
 
       optimizedHashMapResult.set(valueStore, valueRefWord);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
index 9dcaf8f..dceb99c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
@@ -49,11 +49,11 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
       slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
       slotTriples[tripleIndex + 1] = hashCode;
       slotTriples[tripleIndex + 2] = 1;    // Count.
-      // LOG.info("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
       keysAssigned++;
     } else {
       // Add another value.
-      // LOG.info("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
       slotTriples[tripleIndex + 2]++;
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
index b6e6321..91d7fd6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
@@ -79,13 +79,13 @@ public abstract class VectorMapJoinFastBytesHashTable
     while (true) {
       int tripleIndex = 3 * slot;
       if (slotTriples[tripleIndex] == 0) {
-        // LOG.info("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty");
+        // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty");
         isNewKey = true;;
         break;
       }
       if (hashCode == slotTriples[tripleIndex + 1] &&
           keyStore.equalKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) {
-        // LOG.info("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing");
+        // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing");
         isNewKey = false;
         break;
       }
@@ -155,7 +155,7 @@ public abstract class VectorMapJoinFastBytesHashTable
         }
 
         // Use old value reference word.
-        // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
 
         newSlotTriples[newTripleIndex] = keyRef;
         newSlotTriples[newTripleIndex + 1] = hashCode;
@@ -170,7 +170,7 @@ public abstract class VectorMapJoinFastBytesHashTable
     largestNumberOfSteps = newLargestNumberOfSteps;
     resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
     metricExpands++;
-    // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
+    // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
   }
 
   protected long findReadSlot(byte[] keyBytes, int keyStart, int keyLength, long hashCode) {
@@ -181,7 +181,7 @@ public abstract class VectorMapJoinFastBytesHashTable
     int i = 0;
     while (true) {
       int tripleIndex = slot * 3;
-      // LOG.info("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
       if (slotTriples[tripleIndex] != 0 && hashCode == slotTriples[tripleIndex + 1]) {
         // Finally, verify the key bytes match.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
index f2f42ee..9d95d05 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
@@ -112,7 +112,7 @@ public class VectorMapJoinFastKeyStore {
     }
     keyRefWord |= absoluteKeyOffset;
 
-    // LOG.info("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord));
+    // LOG.debug("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord));
     return keyRefWord;
   }
 
@@ -122,7 +122,7 @@ public class VectorMapJoinFastKeyStore {
         (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift);
     boolean isKeyLengthSmall = (storedKeyLengthLength != SmallKeyLength.allBitsOn);
 
-    // LOG.info("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord));
+    // LOG.debug("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord));
 
     if (isKeyLengthSmall && storedKeyLengthLength != keyLength) {
       return false;
@@ -135,7 +135,7 @@ public class VectorMapJoinFastKeyStore {
       // Read big value length we wrote with the value.
       storedKeyLengthLength = writeBuffers.readVInt(readPos);
       if (storedKeyLengthLength != keyLength) {
-        // LOG.info("VectorMapJoinFastKeyStore equalKey no match big length");
+        // LOG.debug("VectorMapJoinFastKeyStore equalKey no match big length");
         return false;
       }
     }
@@ -148,11 +148,11 @@ public class VectorMapJoinFastKeyStore {
 
     for (int i = 0; i < keyLength; i++) {
       if (currentBytes[currentStart + i] != keyBytes[keyStart + i]) {
-        // LOG.info("VectorMapJoinFastKeyStore equalKey no match on bytes");
+        // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes");
         return false;
       }
     }
-    // LOG.info("VectorMapJoinFastKeyStore equalKey match on bytes");
+    // LOG.debug("VectorMapJoinFastKeyStore equalKey match on bytes");
     return true;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
index d6ad028..4725f55 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
@@ -68,7 +68,7 @@ public class VectorMapJoinFastLongHashMap
     optimizedHashMapResult.forget();
 
     long hashCode = VectorMapJoinFastLongHashUtil.hashKey(key);
-    // LOG.info("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode);
+    // LOG.debug("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode);
     long valueRef = findReadSlot(key, hashCode);
     JoinUtil.JoinResult joinResult;
     if (valueRef == -1) {

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index 2137fb7..17855eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -121,13 +121,13 @@ public abstract class VectorMapJoinFastLongHashTable
       int pairIndex = 2 * slot;
       long valueRef = slotPairs[pairIndex];
       if (valueRef == 0) {
-        // LOG.info("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
         isNewKey = true;
         break;
       }
       long tableKey = slotPairs[pairIndex + 1];
       if (key == tableKey) {
-        // LOG.info("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
         isNewKey = false;
         break;
       }
@@ -145,7 +145,7 @@ public abstract class VectorMapJoinFastLongHashTable
       // debugDumpKeyProbe(keyOffset, keyLength, hashCode, slot);
     }
 
-    // LOG.info("VectorMapJoinFastLongHashTable add slot " + slot + " hashCode " + Long.toHexString(hashCode));
+    // LOG.debug("VectorMapJoinFastLongHashTable add slot " + slot + " hashCode " + Long.toHexString(hashCode));
 
     assignSlot(slot, key, isNewKey, currentValue);
 
@@ -206,7 +206,7 @@ public abstract class VectorMapJoinFastLongHashTable
         }
 
         // Use old value reference word.
-        // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
 
         newSlotPairs[newPairIndex] = valueRef;
         newSlotPairs[newPairIndex + 1] = tableKey;
@@ -220,7 +220,7 @@ public abstract class VectorMapJoinFastLongHashTable
     largestNumberOfSteps = newLargestNumberOfSteps;
     resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
     metricExpands++;
-    // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
+    // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
   }
 
   protected long findReadSlot(long key, long hashCode) {
@@ -235,20 +235,20 @@ public abstract class VectorMapJoinFastLongHashTable
       long valueRef = slotPairs[pairIndex];
       if (valueRef == 0) {
         // Given that we do not delete, an empty slot means no match.
-        // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
         return -1;
       }
       long tableKey = slotPairs[pairIndex + 1];
       if (key == tableKey) {
-        // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
         return slotPairs[pairIndex];
       }
       // Some other key (collision) - keep probing.
       probeSlot += (++i);
       if (i > largestNumberOfSteps) {
-        // LOG.info("VectorMapJoinFastLongHashTable findReadSlot returning not found");
+        // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot returning not found");
         // We know we never went that far when we were inserting.
-        // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")");
         return -1;
       }
       slot = (int)(probeSlot & logicalHashBucketMask);

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
index 373b5f4..4b1d6f6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
@@ -81,7 +81,7 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
     int newThreshold = HashMapWrapper.calculateTableSize(
         keyCountAdj, threshold, loadFactor, keyCount);
 
-    // LOG.info("VectorMapJoinFastTableContainer load newThreshold " + newThreshold);
+    // LOG.debug("VectorMapJoinFastTableContainer load newThreshold " + newThreshold);
 
     VectorMapJoinFastHashTable = createHashTable(newThreshold);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
index caa705c..6491dc6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
@@ -142,7 +142,7 @@ public class VectorMapJoinFastValueStore {
     }
 
     public void set(VectorMapJoinFastValueStore valueStore, long valueRefWord) {
-      // LOG.info("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
+      // LOG.debug("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
 
       this.valueStore = valueStore;
       this.valueRefWord = valueRefWord;
@@ -473,7 +473,7 @@ public class VectorMapJoinFastValueStore {
       valueRefWord |= SmallValueLength.allBitsOnBitShifted;
     }
 
-    // LOG.info("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
+    // LOG.debug("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
 
     // The lower bits are the absolute value offset.
     valueRefWord |= newAbsoluteOffset;
@@ -499,7 +499,7 @@ public class VectorMapJoinFastValueStore {
     boolean isOldValueLast =
         ((oldValueRef & IsLastFlag.flagOnMask) != 0);
 
-    // LOG.info("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
+    // LOG.debug("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
 
     /*
      * Write information about the old value (which becomes our next) at the beginning
@@ -546,7 +546,7 @@ public class VectorMapJoinFastValueStore {
     // The lower bits are the absolute value offset.
     newValueRef |=  newAbsoluteOffset;
 
-    // LOG.info("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
+    // LOG.debug("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
 
     return newValueRef;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
index 60825ce..dc65eaa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
@@ -113,7 +113,7 @@ public class VectorMapJoinOptimizedLongCommon {
       }
 
       // byte[] bytes = Arrays.copyOf(currentKey.get(), currentKey.getLength());
-      // LOG.info("VectorMapJoinOptimizedLongCommon adaptPutRow key " + key + " min " + min + " max " + max + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
+      // LOG.debug("VectorMapJoinOptimizedLongCommon adaptPutRow key " + key + " min " + min + " max " + max + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
 
     }
 
@@ -145,7 +145,7 @@ public class VectorMapJoinOptimizedLongCommon {
     }
 
     // byte[] bytes = Arrays.copyOf(output.getData(), output.getLength());
-    // LOG.info("VectorMapJoinOptimizedLongCommon serialize key " + key + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
+    // LOG.debug("VectorMapJoinOptimizedLongCommon serialize key " + key + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
 
     serializedBytes.bytes = output.getData();
     serializedBytes.offset = 0;

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 096239e..656a5e3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1069,11 +1069,21 @@ public class Vectorizer implements PhysicalPlanResolver {
   private boolean validateMapJoinDesc(MapJoinDesc desc) {
     byte posBigTable = (byte) desc.getPosBigTable();
     List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
+    if (!validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER)) {
+      LOG.info("Cannot vectorize map work filter expression");
+      return false;
+    }
     List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable);
+    if (!validateExprNodeDesc(keyExprs)) {
+      LOG.info("Cannot vectorize map work key expression");
+      return false;
+    }
     List<ExprNodeDesc> valueExprs = desc.getExprs().get(posBigTable);
-    return validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER) &&
-        validateExprNodeDesc(keyExprs) &&
-        validateExprNodeDesc(valueExprs);
+    if (!validateExprNodeDesc(valueExprs)) {
+      LOG.info("Cannot vectorize map work value expression");
+      return false;
+    }
+    return true;
   }
 
   private boolean validateReduceSinkOperator(ReduceSinkOperator op) {
@@ -1089,6 +1099,7 @@ public class Vectorizer implements PhysicalPlanResolver {
     for (ExprNodeDesc desc : descList) {
       boolean ret = validateExprNodeDesc(desc);
       if (!ret) {
+        LOG.info("Cannot vectorize select expression: " + desc.toString());
         return false;
       }
     }
@@ -1110,10 +1121,12 @@ public class Vectorizer implements PhysicalPlanResolver {
     }
     boolean ret = validateExprNodeDesc(desc.getKeys());
     if (!ret) {
+      LOG.info("Cannot vectorize groupby key expression");
       return false;
     }
     ret = validateAggregationDesc(desc.getAggregators(), isReduce);
     if (!ret) {
+      LOG.info("Cannot vectorize groupby aggregate expression");
       return false;
     }
     if (isReduce) {
@@ -1248,10 +1261,13 @@ public class Vectorizer implements PhysicalPlanResolver {
   }
 
   private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduce) {
-    if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) {
+    String udfName = aggDesc.getGenericUDAFName().toLowerCase();
+    if (!supportedAggregationUdfs.contains(udfName)) {
+      LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported");
       return false;
     }
     if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) {
+      LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported");
       return false;
     }
     // See if we can vectorize the aggregation.

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/queries/clientpositive/vector_join30.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join30.q b/ql/src/test/queries/clientpositive/vector_join30.q
new file mode 100644
index 0000000..2275804
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join30.q
@@ -0,0 +1,160 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src;
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/queries/clientpositive/vector_join_filters.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join_filters.q b/ql/src/test/queries/clientpositive/vector_join_filters.q
new file mode 100644
index 0000000..adf525c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join_filters.q
@@ -0,0 +1,38 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int);
+LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt;
+CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/queries/clientpositive/vector_join_nulls.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join_nulls.q b/ql/src/test/queries/clientpositive/vector_join_nulls.q
new file mode 100644
index 0000000..6cfb7a8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join_nulls.q
@@ -0,0 +1,33 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int);
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt;
+CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/d421201c/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_left_outer_join2.q b/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
index 098d002..62ad9ee 100644
--- a/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
+++ b/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
@@ -3,6 +3,8 @@ set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=10000;
 
+-- SORT_QUERY_RESULTS
+
 drop table if exists TJOIN1;
 drop table if exists TJOIN2;
 create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc;


Mime
View raw message