hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r834649 [1/2] - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/java/org/apache/hadoop/hive/ql/plan/ ql/src/test/queries/clientnegative/ ql/src/test/queries/clientpos...
Date Tue, 10 Nov 2009 20:18:29 GMT
Author: namit
Date: Tue Nov 10 20:18:28 2009
New Revision: 834649

URL: http://svn.apache.org/viewvc?rev=834649&view=rev
Log:
HIVE-870. Add left semi join. (Ning Zhang via namit)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin1.q
    hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin2.q
    hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin3.q
    hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin4.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/semijoin.q
    hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin3.q.out
    hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/semijoin.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Nov 10 20:18:28 2009
@@ -77,6 +77,8 @@
 
     HIVE-911. Add UDF WeekOfYear. (Paul Yang via zshao)
 
+    HIVE-870. Add left semi join. (Ning Zhang via namit)
+
   IMPROVEMENTS
 
     HIVE-760. Add version info to META-INF/MANIFEST.MF.

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java Tue Nov 10 20:18:28 2009
@@ -74,6 +74,10 @@
     public void popObj() {
       curSize--;
     }
+    
+    public Object topObj() {
+      return objs[curSize-1];
+    }
   }
 
   transient protected int numAliases; // number of aliases
@@ -97,7 +101,6 @@
                                        // potential nulls for the concerned
                                        // aliases
   transient private ArrayList<ArrayList<Object>>[] dummyObjVectors;
-  transient private Stack<Iterator<ArrayList<Object>>> iterators;
   transient protected int totalSz; // total size of the composite object
   
   // keys are the column names. basically this maps the position of the column in 
@@ -217,9 +220,6 @@
       dummyObjVectors[pos] = values;
       pos++;
     }
-
-    iterators = new Stack<Iterator<ArrayList<Object>>>();
-    
     joinEmitInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEJOINEMITINTERVAL);
     
     forwardCache = new Object[totalSz];
@@ -309,6 +309,31 @@
     }
     return resNulls;
   }
+  
+  /**
+   * Implement semi join operator.
+   */
+  private ArrayList<boolean[]> joinObjectsLeftSemiJoin(ArrayList<boolean[]> resNulls,
+                                                       ArrayList<boolean[]> inputNulls, 
+                                                       ArrayList<Object> newObj,
+                                                       IntermediateObject intObj, 
+                                                       int left, 
+                                                       boolean newObjNull) {
+    if (newObjNull)
+      return resNulls;
+    Iterator<boolean[]> nullsIter = inputNulls.iterator();
+    while (nullsIter.hasNext()) {
+      boolean[] oldNulls = nullsIter.next();
+      boolean oldObjNull = oldNulls[left];
+      if (!oldObjNull) {
+        boolean[] newNulls = new boolean[intObj.getCurSize()];
+        copyOldArray(oldNulls, newNulls);
+        newNulls[oldNulls.length] = false;
+        resNulls.add(newNulls);
+      }
+    }
+    return resNulls;
+  }
 
   private ArrayList<boolean[]> joinObjectsLeftOuterJoin(
       ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
@@ -452,8 +477,8 @@
    * inner join. The outer joins are processed appropriately.
    */
   private ArrayList<boolean[]> joinObjects(ArrayList<boolean[]> inputNulls,
-                                        ArrayList<Object> newObj, IntermediateObject intObj, 
-                                        int joinPos, boolean firstRow) {
+                                         ArrayList<Object> newObj, IntermediateObject intObj, 
+                                         int joinPos, boolean firstRow) {
     ArrayList<boolean[]> resNulls = new ArrayList<boolean[]>();
     boolean newObjNull = newObj == dummyObj[joinPos] ? true : false;
     if (joinPos == 0) {
@@ -491,6 +516,10 @@
     else if (type == joinDesc.RIGHT_OUTER_JOIN)
       return joinObjectsRightOuterJoin(resNulls, inputNulls, newObj, intObj,
                                        left, newObjNull, firstRow);
+    else if (type == joinDesc.LEFT_SEMI_JOIN)
+      return joinObjectsLeftSemiJoin(resNulls, inputNulls, newObj, intObj, 
+                                     left, newObjNull);
+      
     assert (type == joinDesc.FULL_OUTER_JOIN);
     return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left,
                                     newObjNull, firstRow);
@@ -506,20 +535,40 @@
   private void genObject(ArrayList<boolean[]> inputNulls, int aliasNum,
                          IntermediateObject intObj, boolean firstRow) throws HiveException {
     boolean childFirstRow = firstRow;
+    boolean skipping = false;
+    
     if (aliasNum < numAliases) {
-      Iterator<ArrayList<Object>> aliasRes = storage.get(order[aliasNum])
-          .iterator();
-      iterators.push(aliasRes);
+    
+      // search for match in the rhs table
+      Iterator<ArrayList<Object>> aliasRes = storage.get(order[aliasNum]).iterator();
       while (aliasRes.hasNext()) {
+        
         ArrayList<Object> newObj = aliasRes.next();
+        
+        // check for skipping in case of left semi join
+        if (aliasNum > 0 &&
+            condn[aliasNum - 1].getType() ==  joinDesc.LEFT_SEMI_JOIN &&
+            newObj != dummyObj[aliasNum] ) { // successful match
+          skipping = true;
+        }
+        
         intObj.pushObj(newObj);
-        ArrayList<boolean[]> newNulls = joinObjects(inputNulls, newObj, intObj,
-                                                 aliasNum, childFirstRow);
+        
+        // execute the actual join algorithm
+        ArrayList<boolean[]> newNulls =  joinObjects(inputNulls, newObj, intObj,
+                                                     aliasNum, childFirstRow);
+        
+        // recursively call the join the other rhs tables
         genObject(newNulls, aliasNum + 1, intObj, firstRow);
+        
         intObj.popObj();
         firstRow = false;
+        
+        // if left-semi-join found a match, skipping the rest of the rows in the rhs table of the semijoin
+        if ( skipping ) {
+          break;
+        }
       }
-      iterators.pop();
     } else {
       if (inputNulls == null)
         return;
@@ -530,7 +579,7 @@
       }
     }
   }
-
+ 
   /**
    * Forward a record of join results.
    * 
@@ -538,6 +587,8 @@
    */
   public void endGroup() throws HiveException {
     LOG.trace("Join Op: endGroup called: numValues=" + numAliases);
+    
+    
     checkAndGenObject();
   }
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java Tue Nov 10 20:18:28 2009
@@ -135,6 +135,7 @@
             while (true) {
               InspectableObject row = fetchOp.getNextRow();
               if (row == null) {
+                forwardOp.close(false);
                 break;
               }
               fetchOpRows++;

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Tue Nov 10 20:18:28 2009
@@ -22,6 +22,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Vector;
@@ -646,16 +647,25 @@
   }
   
   public String dump(int level) {
+    return dump(level, new HashSet<Integer>());
+  }
+  
+  public String dump(int level, HashSet<Integer> seenOpts) {
+    if ( seenOpts.contains(new Integer(id)))
+      return null;
+    seenOpts.add(new Integer(id));
+    
     StringBuilder s = new StringBuilder();
     String ls = getLevelString(level);
     s.append(ls);
     s.append("<" + getName() + ">");
     s.append("Id =" + id);
+    
     if (childOperators != null) {
       s.append(ls);
       s.append("  <Children>");
       for (Operator<? extends Serializable> o : childOperators) {
-        s.append(o.dump(level+2));
+        s.append(o.dump(level+2, seenOpts));
       }
       s.append(ls);
       s.append("  <\\Children>");
@@ -666,6 +676,7 @@
       s.append("  <Parent>");
       for (Operator<? extends Serializable> o : parentOperators) {
         s.append("Id = " + o.id + " ");
+        s.append(o.dump(level,seenOpts));
       }
       s.append("<\\Parent>");
     }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java Tue Nov 10 20:18:28 2009
@@ -62,4 +62,24 @@
   public String getName() {
     return (new Integer(super.getToken().getType())).toString();
   }
+  
+  public String dump() {
+    StringBuffer sb = new StringBuffer();
+    
+    sb.append('(');
+    sb.append(this.toString());
+    Vector<Node> children = getChildren();
+    if ( children != null ) {
+      for ( Node node : getChildren() ) {
+        if ( node instanceof ASTNode ) {
+          sb.append(((ASTNode) node).dump());
+        } else {
+          sb.append("NON-ASTNODE!!");
+        }
+      }
+    }
+    sb.append(')');
+    return sb.toString();
+  }
+  
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Tue Nov 10 20:18:28 2009
@@ -143,6 +143,7 @@
 TOK_USERSCRIPTCOLSCHEMA;
 TOK_RECORDREADER;
 TOK_RECORDWRITER;
+TOK_LEFTSEMIJOIN;
 }
 
 
@@ -891,9 +892,10 @@
 @after { msgs.pop(); }
     :
       KW_JOIN                     -> TOK_JOIN
-    | KW_LEFT KW_OUTER KW_JOIN    -> TOK_LEFTOUTERJOIN
+    | KW_LEFT  KW_OUTER KW_JOIN   -> TOK_LEFTOUTERJOIN
     | KW_RIGHT KW_OUTER KW_JOIN   -> TOK_RIGHTOUTERJOIN
-    | KW_FULL KW_OUTER KW_JOIN    -> TOK_FULLOUTERJOIN
+    | KW_FULL  KW_OUTER KW_JOIN   -> TOK_FULLOUTERJOIN
+    | KW_LEFT  KW_SEMI  KW_JOIN   -> TOK_LEFTSEMIJOIN
     ;
 
 fromSource
@@ -1436,6 +1438,7 @@
 KW_TRIGGER: 'TRIGGER';
 KW_RECORDREADER: 'RECORDREADER';
 KW_RECORDWRITER: 'RECORDWRITER';
+KW_SEMI: 'SEMI';
 
 
 // Operators

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java Tue Nov 10 20:18:28 2009
@@ -18,8 +18,11 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
+import java.util.HashMap;
 import java.util.Vector;
 import java.util.List;
+import java.util.ArrayList;
+import java.util.Map.Entry;
 
 /**
  * Internal representation of the join tree
@@ -35,6 +38,10 @@
   private int           nextTag;
   private joinCond[]    joinCond;
   private boolean       noOuterJoin;
+  private boolean       noSemiJoin;
+  
+  // keeps track of the right-hand-side table name of the left-semi-join, and its list of join keys
+  private HashMap<String, ArrayList<ASTNode>> rhsSemijoin;
   
   // join conditions
   private Vector<Vector<ASTNode>> expressions;
@@ -52,7 +59,12 @@
   /**
    * constructor 
    */
-  public QBJoinTree() { nextTag = 0;}
+  public QBJoinTree() { 
+    nextTag = 0;
+    noOuterJoin = true;
+    noSemiJoin  = true;
+    rhsSemijoin = new HashMap<String, ArrayList<ASTNode>>();
+  }
 
   /**
    * returns left alias if any - this is used for merging later on
@@ -133,20 +145,28 @@
   public void setNoOuterJoin(boolean noOuterJoin) {
     this.noOuterJoin = noOuterJoin;
   }
+  
+  public boolean getNoSemiJoin() {
+    return noSemiJoin;
+  }
 
-	/**
-	 * @return the filters
-	 */
-	public Vector<Vector<ASTNode>> getFilters() {
-		return filters;
-	}
-
-	/**
-	 * @param filters the filters to set
-	 */
-	public void setFilters(Vector<Vector<ASTNode>> filters) {
-		this.filters = filters;
-	}
+  public void setNoSemiJoin(boolean semi) {
+    this.noSemiJoin = semi;
+  }
+
+  /**
+   * @return the filters
+   */
+  public Vector<Vector<ASTNode>> getFilters() {
+    return filters;
+  }
+
+  /**
+   * @param filters the filters to set
+   */
+  public void setFilters(Vector<Vector<ASTNode>> filters) {
+    this.filters = filters;
+  }
 
   /**
    * @return the mapSidejoin
@@ -183,6 +203,66 @@
   public void setStreamAliases(List<String> streamAliases) {
     this.streamAliases = streamAliases;
   }
+  
+  /**
+   * Insert only a key to the semijoin table name to column names map. 
+   * @param alias table name alias.
+   */
+  public void addRHSSemijoin(String alias) {
+    if ( ! rhsSemijoin.containsKey(alias) ) {
+      rhsSemijoin.put(alias, null);
+    }
+  }
+  
+  /**
+   * Remeber the mapping of table alias to set of columns.
+   * @param alias
+   * @param columns
+   */
+  public void addRHSSemijoinColumns(String alias, ArrayList<ASTNode> columns) {
+    ArrayList<ASTNode> cols = rhsSemijoin.get(alias);
+    if ( cols == null ) {
+      rhsSemijoin.put(alias, columns);
+    } else {
+      cols.addAll(columns);
+    }
+  }
+  
+  /**
+   * Remeber the mapping of table alias to set of columns.
+   * @param alias
+   * @param columns
+   */
+  public void addRHSSemijoinColumns(String alias, ASTNode column) {
+    ArrayList<ASTNode> cols = rhsSemijoin.get(alias);
+    if ( cols == null ) {
+      cols = new ArrayList<ASTNode>();
+      cols.add(column);
+      rhsSemijoin.put(alias, cols);
+    } else {
+      cols.add(column);
+    }
+  }
+  
+  public ArrayList<ASTNode> getRHSSemijoinColumns(String alias) {
+    return rhsSemijoin.get(alias);
+  }
+  
+  /**
+   * Merge the rhs tables from another join tree.
+   * @param src the source join tree
+   */
+  public void mergeRHSSemijoin(QBJoinTree src) {
+    for (Entry<String, ArrayList<ASTNode>> e: src.rhsSemijoin.entrySet()) {
+      String key = e.getKey();
+      ArrayList<ASTNode> value = this.rhsSemijoin.get(key);
+      if ( value == null ) {
+        this.rhsSemijoin.put(key, e.getValue());
+      } else {
+        value.addAll(e.getValue());
+      }
+    }
+  }
 }
 
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Nov 10 20:18:28 2009
@@ -426,10 +426,11 @@
 
   private boolean isJoinToken(ASTNode node)
   {
-    if ((node.getToken().getType() == HiveParser.TOK_JOIN) ||
-        (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) ||
+    if ((node.getToken().getType() == HiveParser.TOK_JOIN)           ||
+        (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)  ||
         (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) ||
-        (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN) ||
+        (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)  ||
+        (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)   ||
         (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN))
       return true;
 
@@ -725,7 +726,8 @@
 
   @SuppressWarnings("nls")
   private void parseJoinCondPopulateAlias(QBJoinTree joinTree,
-      ASTNode condn, Vector<String> leftAliases, Vector<String> rightAliases)
+      ASTNode condn, Vector<String> leftAliases, Vector<String> rightAliases,
+      ArrayList<String> fields)
       throws SemanticException {
     // String[] allAliases = joinTree.getAllAliases();
     switch (condn.getToken().getType()) {
@@ -744,9 +746,14 @@
       }
       break;
 
+    case HiveParser.Identifier:
+      // it may be a field name, return the identifier and let the caller decide whether it is or not
+      if ( fields != null ) {
+        fields.add(unescapeIdentifier(condn.getToken().getText().toLowerCase()));
+      }
+      break;
     case HiveParser.Number:
     case HiveParser.StringLiteral:
-    case HiveParser.Identifier:
     case HiveParser.TOK_CHARSETLITERAL:
     case HiveParser.KW_TRUE:
     case HiveParser.KW_FALSE:
@@ -756,19 +763,42 @@
       // check all the arguments
       for (int i = 1; i < condn.getChildCount(); i++)
         parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i),
-            leftAliases, rightAliases);
+            leftAliases, rightAliases, null);
       break;
 
     default:
       // This is an operator - so check whether it is unary or binary operator
       if (condn.getChildCount() == 1)
         parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
-            leftAliases, rightAliases);
-      else if (condn.getChildCount() == 2) {
-        parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
-            leftAliases, rightAliases);
-        parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
-            leftAliases, rightAliases);
+            leftAliases, rightAliases, null);
+      else if (condn.getChildCount() == 2) { 
+        
+        ArrayList<String> fields1 = null;
+        // if it is a dot operator, remember the field name of the rhs of the left semijoin
+        if (joinTree.getNoSemiJoin() == false &&
+            condn.getToken().getText().equals("." )) {
+          // get the semijoin rhs table name and field name
+          fields1 = new ArrayList<String>();
+          int rhssize = rightAliases.size();
+          parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
+              leftAliases, rightAliases, null);
+          String rhsAlias = null;
+          
+          if ( rightAliases.size() > rhssize ) { // the new table is rhs table
+            rhsAlias = rightAliases.get(rightAliases.size()-1);
+          }
+          
+          parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
+              leftAliases, rightAliases, fields1);
+          if ( rhsAlias != null && fields1.size() > 0 ) {
+            joinTree.addRHSSemijoinColumns(rhsAlias, condn);
+          }
+        } else {
+          parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
+              leftAliases, rightAliases, null);
+          parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
+              leftAliases, rightAliases, fields1);
+        }
       } else
         throw new SemanticException(condn.toStringTree() + " encountered with "
             + condn.getChildCount() + " children");
@@ -827,12 +857,12 @@
       ASTNode leftCondn = (ASTNode) joinCond.getChild(0);
       Vector<String> leftCondAl1 = new Vector<String>();
       Vector<String> leftCondAl2 = new Vector<String>();
-      parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2);
+      parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, null);
 
       ASTNode rightCondn = (ASTNode) joinCond.getChild(1);
       Vector<String> rightCondAl1 = new Vector<String>();
       Vector<String> rightCondAl2 = new Vector<String>();
-      parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, rightCondAl2);
+      parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, rightCondAl2, null);
 
       // is it a filter or a join condition
       if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) ||
@@ -877,7 +907,7 @@
       }
 
       for (int ci=childrenBegin; ci<joinCond.getChildCount(); ci++)
-        parseJoinCondPopulateAlias(joinTree, (ASTNode)joinCond.getChild(ci), leftAlias.get(ci-childrenBegin), rightAlias.get(ci-childrenBegin));
+        parseJoinCondPopulateAlias(joinTree, (ASTNode)joinCond.getChild(ci), leftAlias.get(ci-childrenBegin), rightAlias.get(ci-childrenBegin), null);
 
       boolean leftAliasNull = true;
       for (Vector<String> left : leftAlias) {
@@ -2951,61 +2981,67 @@
     return output;
   }
 
-  private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right)
+  private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right, 
+                                           HashSet<Integer> omitOpts)
     throws SemanticException {
+    
     RowResolver outputRS = new RowResolver();
     ArrayList<String> outputColumnNames = new ArrayList<String>();
     // all children are base classes
     Operator<?>[] rightOps = new Operator[right.length];
-    int pos = 0;
     int outputPos = 0;
 
     Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
     HashMap<Byte, List<exprNodeDesc>> exprMap = new HashMap<Byte, List<exprNodeDesc>>();
     Map<String, exprNodeDesc> colExprMap = new HashMap<String, exprNodeDesc>();
     HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
-    for (Operator input : right)
-    {
-      ArrayList<exprNodeDesc> keyDesc = new ArrayList<exprNodeDesc>();
+    
+    for ( int pos = 0; pos < right.length; ++pos ) {
+      
+      Operator input = right[pos];
       if (input == null)
         input = left;
+      
+      ArrayList<exprNodeDesc> keyDesc = new ArrayList<exprNodeDesc>();
       Byte tag = Byte.valueOf((byte)(((reduceSinkDesc)(input.getConf())).getTag()));
-      RowResolver inputRS = opParseCtx.get(input).getRR();
-      Iterator<String> keysIter = inputRS.getTableNames().iterator();
-      Set<String> aliases = posToAliasMap.get(pos);
-      if(aliases == null) {
-        aliases = new HashSet<String>();
-        posToAliasMap.put(pos, aliases);
-      }
-
-      while (keysIter.hasNext())
-      {
-        String key = keysIter.next();
-        aliases.add(key);
-        HashMap<String, ColumnInfo> map = inputRS.getFieldMap(key);
-        Iterator<String> fNamesIter = map.keySet().iterator();
-        while (fNamesIter.hasNext())
-        {
-          String field = fNamesIter.next();
-          ColumnInfo valueInfo = inputRS.get(key, field);
-          keyDesc.add(new exprNodeColumnDesc(valueInfo.getType(),
-                                             valueInfo.getInternalName(),
-                                             valueInfo.getTabAlias(),
-                                             valueInfo.getIsPartitionCol()));
-          if (outputRS.get(key, field) == null) {
-            String colName = getColumnInternalName(outputPos);
-            outputPos++;
-            outputColumnNames.add(colName);
-            colExprMap.put(colName, keyDesc.get(keyDesc.size() - 1));
-            outputRS.put(key, field, new ColumnInfo(colName,
-                                                    valueInfo.getType(), key, false));
-            reversedExprs.put(colName, tag);
+      
+      // check whether this input operator produces output
+      if ( omitOpts == null || !omitOpts.contains(pos) ) {
+        // prepare output descriptors for the input opt
+        RowResolver inputRS = opParseCtx.get(input).getRR();
+  	    Iterator<String> keysIter = inputRS.getTableNames().iterator();
+        Set<String> aliases = posToAliasMap.get(pos);
+    	  if(aliases == null) {
+          aliases = new HashSet<String>();
+      	  posToAliasMap.put(pos, aliases);
+      	}
+	      while (keysIter.hasNext()) {
+          String key = keysIter.next();
+          aliases.add(key);
+          HashMap<String, ColumnInfo> map = inputRS.getFieldMap(key);
+          Iterator<String> fNamesIter = map.keySet().iterator();
+      	  while (fNamesIter.hasNext()) {
+        	  String field = fNamesIter.next();
+            ColumnInfo valueInfo = inputRS.get(key, field);
+            keyDesc.add(new exprNodeColumnDesc(valueInfo.getType(),
+                                               valueInfo.getInternalName(),
+                                               valueInfo.getTabAlias(),
+                                               valueInfo.getIsPartitionCol()));
+            
+            if (outputRS.get(key, field) == null) {
+              String colName = getColumnInternalName(outputPos);
+              outputPos++;
+              outputColumnNames.add(colName);
+              colExprMap.put(colName, keyDesc.get(keyDesc.size() - 1));
+              outputRS.put(key, field, new ColumnInfo(colName,
+                                                      valueInfo.getType(), key, false));
+              reversedExprs.put(colName, tag);
+            }
           }
         }
-      }
-
+      } 
       exprMap.put(tag, keyDesc);
-      rightOps[pos++] = input;
+      rightOps[pos] = input;
     }
 
     org.apache.hadoop.hive.ql.plan.joinCond[] joinCondns = new org.apache.hadoop.hive.ql.plan.joinCond[join.getJoinCond().length];
@@ -3101,10 +3137,30 @@
     }
 
     Operator[] srcOps = new Operator[joinTree.getBaseSrc().length];
+    
+    HashSet<Integer> omitOpts = null;    // set of input to the join that should be omitted by the output
     int pos = 0;
     for (String src : joinTree.getBaseSrc()) {
       if (src != null) {
         Operator srcOp = map.get(src);
+        
+        // for left-semi join, generate an additional selection & group-by operator before ReduceSink
+        ArrayList<ASTNode> fields = joinTree.getRHSSemijoinColumns(src);
+        if ( fields != null ) {
+          // the RHS table columns should be not be output from the join
+          if ( omitOpts == null ) {
+            omitOpts = new HashSet<Integer>();
+          }
+          omitOpts.add(pos);
+          
+          // generate a selection operator for group-by keys only
+          srcOp = insertSelectForSemijoin(fields, srcOp);
+          
+          // generate a groupby operator (HASH mode) for a map-side partial aggregation for semijoin
+          srcOp = genMapGroupByForSemijoin(qb, fields, srcOp, groupByDesc.Mode.HASH);
+        }
+        
+        // generate a ReduceSink operator for the join
         srcOps[pos] = genJoinReduceSinkChild(qb, joinTree, srcOp, src, pos);
         pos++;
       } else {
@@ -3116,10 +3172,139 @@
     // Type checking and implicit type conversion for join keys
     genJoinOperatorTypeCheck(joinSrcOp, srcOps);
 
-    JoinOperator joinOp = (JoinOperator)genJoinOperatorChildren(joinTree, joinSrcOp, srcOps);
+    JoinOperator joinOp = (JoinOperator)genJoinOperatorChildren(joinTree, joinSrcOp, srcOps, omitOpts);
     joinContext.put(joinOp, joinTree);
     return joinOp;
   }
+  
+  /**
+   * Construct a selection operator for semijoin that filter out all fields other than the group by keys.
+   * 
+   * @param fields list of fields need to be output
+   * @param input input operator
+   * @return the selection operator.
+   * @throws SemanticException
+   */
+  private Operator insertSelectForSemijoin(ArrayList<ASTNode> fields, Operator input)
+    throws SemanticException {
+    
+    RowResolver             inputRR = opParseCtx.get(input).getRR();
+    ArrayList<exprNodeDesc> colList = new ArrayList<exprNodeDesc>();
+    ArrayList<String>   columnNames = new ArrayList<String>();
+    
+    // construct the list of columns that need to be projected 
+    for (ASTNode field: fields) {
+      exprNodeColumnDesc exprNode = (exprNodeColumnDesc) genExprNodeDesc(field, inputRR);
+      colList.add(exprNode);
+      columnNames.add(exprNode.getColumn());
+    }
+    
+    // create selection operator
+    Operator output = putOpInsertMap(
+                        OperatorFactory.getAndMakeChild(
+                          new selectDesc(colList, columnNames, false),  
+                          new RowSchema(inputRR.getColumnInfos()), 
+                          input), 
+                        inputRR);
+    
+    output.setColumnExprMap(input.getColumnExprMap());
+    return output;
+  }
+
+  private Operator genMapGroupByForSemijoin(QB qb, 
+                                            ArrayList<ASTNode> fields,   // the ASTNode of the join key "tab.col"
+                                            Operator inputOperatorInfo, 
+                                            groupByDesc.Mode mode)
+    throws SemanticException {
+    
+    RowResolver     groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR();
+    RowResolver    groupByOutputRowResolver = new RowResolver();
+    ArrayList<exprNodeDesc>     groupByKeys = new ArrayList<exprNodeDesc>();
+    ArrayList<String>     outputColumnNames = new ArrayList<String>();
+    ArrayList<aggregationDesc> aggregations = new ArrayList<aggregationDesc>();
+    Map<String, exprNodeDesc>    colExprMap = new HashMap<String, exprNodeDesc>();
+    QBParseInfo                   parseInfo = qb.getParseInfo();
+    
+    groupByOutputRowResolver.setIsExprResolver(true); // join keys should only be columns but not be expressions
+    
+    for (int i = 0; i < fields.size(); ++i) {
+      // get the group by keys to ColumnInfo
+      ASTNode colName = fields.get(i);
+      exprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
+      groupByKeys.add(grpByExprNode);
+      
+      // generate output column names
+      String field = getColumnInternalName(i);
+      outputColumnNames.add(field);
+      ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false);
+      groupByOutputRowResolver.put("",  colName.toStringTree(), colInfo2);
+      
+      // establish mapping from the output column to the input column
+      colExprMap.put(field, grpByExprNode);
+    }
+
+    // Generate group-by operator
+    Operator op = putOpInsertMap(
+                    OperatorFactory.getAndMakeChild(
+                      new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false),
+                      new RowSchema(groupByOutputRowResolver.getColumnInfos()),
+                      inputOperatorInfo),
+                    groupByOutputRowResolver);
+    
+    op.setColumnExprMap(colExprMap);
+    return op;
+  }
+  
+  private Operator genReduceSinkForSemijoin(QB qb, 
+                                            ArrayList<ASTNode> fields,  // semijoin key for the rhs table
+                                            Operator inputOperatorInfo) 
+    throws SemanticException {
+    
+    RowResolver  reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR();
+    QBParseInfo                   parseInfo = qb.getParseInfo();
+    RowResolver reduceSinkOutputRowResolver = new RowResolver();
+    Map<String, exprNodeDesc>    colExprMap = new HashMap<String, exprNodeDesc>();
+    ArrayList<exprNodeDesc>      reduceKeys = new ArrayList<exprNodeDesc>();
+    List<String>          outputColumnNames = new ArrayList<String>();
+    
+    reduceSinkOutputRowResolver.setIsExprResolver(true);
+    
+    // Pre-compute group-by keys and store in reduceKeys
+    for (int i = 0; i < fields.size(); ++i) {
+      // based on the input row resolver, resolve the column names and construct expression node descriptors
+      ASTNode colName = fields.get(i);
+      exprNodeDesc inputExpr = genExprNodeDesc(colName, reduceSinkInputRowResolver);
+      
+      reduceKeys.add(inputExpr);
+      
+      // create new ColumnInfos for the groupby columns and put them into the output row resolver
+      if (reduceSinkOutputRowResolver.get("", colName.toStringTree()) == null) {
+        outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
+        String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1);
+        ColumnInfo colInfo1 = new ColumnInfo(field,
+                                             reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), 
+                                             null, false);
+        reduceSinkOutputRowResolver.put("", colName.toStringTree(), colInfo1);
+        colExprMap.put(colInfo1.getInternalName(), inputExpr);
+      } else {
+        throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY.getMsg());
+      }
+    }
+    
+    // SEMIJOIN HAS NO AGGREGATIONS, and we don't really use reduce values, so leave it as an empty list
+    ArrayList<exprNodeDesc> reduceValues = new ArrayList<exprNodeDesc>();
+    int numPartitionFields = fields.size();
+
+    // finally generate the ReduceSink operator
+    ReduceSinkOperator rsOp = (ReduceSinkOperator)  putOpInsertMap(
+        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, -1),
+                                        new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()),
+                                        inputOperatorInfo),
+        reduceSinkOutputRowResolver);
+    rsOp.setColumnExprMap(colExprMap);
+    
+    return rsOp;
+  }
 
   private void genJoinOperatorTypeCheck(Operator left, Operator[] right) throws SemanticException {
     // keys[i] -> ArrayList<exprNodeDesc> for the i-th join operator key list
@@ -3311,26 +3496,28 @@
       throws SemanticException {
     QBJoinTree joinTree = new QBJoinTree();
     joinCond[] condn = new joinCond[1];
-
-    if (joinParseTree.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
-    {
+    
+    switch (joinParseTree.getToken().getType() ) {
+    case HiveParser.TOK_LEFTOUTERJOIN:
       joinTree.setNoOuterJoin(false);
       condn[0] = new joinCond(0, 1, joinType.LEFTOUTER);
-    }
-    else if (joinParseTree.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
-    {
+      break;
+    case HiveParser.TOK_RIGHTOUTERJOIN:
       joinTree.setNoOuterJoin(false);
       condn[0] = new joinCond(0, 1, joinType.RIGHTOUTER);
-    }
-    else if (joinParseTree.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)
-    {
+      break;
+    case HiveParser.TOK_FULLOUTERJOIN:
       joinTree.setNoOuterJoin(false);
       condn[0] = new joinCond(0, 1, joinType.FULLOUTER);
-    }
-    else
-    {
+      break;
+    case HiveParser.TOK_LEFTSEMIJOIN:
+      joinTree.setNoSemiJoin(false);
+      condn[0] = new joinCond(0, 1, joinType.LEFTSEMI);
+      break;
+    default:
       condn[0] = new joinCond(0, 1, joinType.INNER);
       joinTree.setNoOuterJoin(true);
+      break;
     }
 
     joinTree.setJoinCond(condn);
@@ -3376,6 +3563,10 @@
         children = new String[2];
       children[1] = alias;
       joinTree.setBaseSrc(children);
+      // remember rhs table for semijoin
+      if (joinTree.getNoSemiJoin() == false) {
+        joinTree.addRHSSemijoin(alias);
+      }
     } else
       assert false;
 
@@ -3493,6 +3684,13 @@
     else
       target.setNoOuterJoin(false);
 
+    if (node.getNoSemiJoin() && target.getNoSemiJoin())
+      target.setNoSemiJoin(true);
+    else
+      target.setNoSemiJoin(false);
+
+    target.mergeRHSSemijoin(node);
+    
     joinCond[] nodeCondns = node.getJoinCond();
     int nodeCondnsSize = nodeCondns.length;
     joinCond[] targetCondns = target.getJoinCond();

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/joinType.java Tue Nov 10 20:18:28 2009
@@ -18,4 +18,4 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
-public enum joinType {INNER, LEFTOUTER, RIGHTOUTER, FULLOUTER, UNIQUE};
+public enum joinType {INNER, LEFTOUTER, RIGHTOUTER, FULLOUTER, UNIQUE, LEFTSEMI};

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinCond.java Tue Nov 10 20:18:28 2009
@@ -20,6 +20,7 @@
 
 import java.io.Serializable;
 import java.util.Vector;
+import org.apache.hadoop.hive.ql.parse.joinType;
 
 /**
  * Join conditions Descriptor implementation.
@@ -44,19 +45,28 @@
     this.left       = condn.getLeft();
     this.right      = condn.getRight();
     this.preserved  = condn.getPreserved();
-    org.apache.hadoop.hive.ql.parse.joinType itype = condn.getJoinType();
-    if (itype == org.apache.hadoop.hive.ql.parse.joinType.INNER)
+    switch ( condn.getJoinType() ) {
+    case INNER:
       this.type = joinDesc.INNER_JOIN;
-    else if (itype == org.apache.hadoop.hive.ql.parse.joinType.LEFTOUTER)
+      break;
+    case LEFTOUTER:
       this.type = joinDesc.LEFT_OUTER_JOIN;
-    else if (itype == org.apache.hadoop.hive.ql.parse.joinType.RIGHTOUTER)
+      break;
+    case RIGHTOUTER:
       this.type = joinDesc.RIGHT_OUTER_JOIN;
-    else if (itype == org.apache.hadoop.hive.ql.parse.joinType.FULLOUTER)
+      break;
+    case FULLOUTER:
       this.type = joinDesc.FULL_OUTER_JOIN;
-    else if (itype == org.apache.hadoop.hive.ql.parse.joinType.UNIQUE)
+      break;
+    case UNIQUE:
       this.type = joinDesc.UNIQUE_JOIN;
-    else
+      break;
+    case LEFTSEMI:
+      this.type = joinDesc.LEFT_SEMI_JOIN;
+      break;
+    default:
       assert false;
+    }
   }
   
   /**
@@ -117,8 +127,11 @@
     case joinDesc.UNIQUE_JOIN:
       sb.append("Unique Join");
       break;
+    case joinDesc.LEFT_SEMI_JOIN:
+      sb.append("Left Semi Join ");
+      break;
     default:
-      sb.append("Unknow Join");
+      sb.append("Unknow Join ");
       break;
     }
     

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java?rev=834649&r1=834648&r2=834649&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/joinDesc.java Tue Nov 10 20:18:28 2009
@@ -37,11 +37,12 @@
 @explain(displayName="Join Operator")
 public class joinDesc implements Serializable {
   private static final long serialVersionUID = 1L;
-  public static final int INNER_JOIN = 0;
-  public static final int LEFT_OUTER_JOIN = 1;
+  public static final int INNER_JOIN       = 0;
+  public static final int LEFT_OUTER_JOIN  = 1;
   public static final int RIGHT_OUTER_JOIN = 2;
-  public static final int FULL_OUTER_JOIN = 3;
-  public static final int UNIQUE_JOIN = 4;
+  public static final int FULL_OUTER_JOIN  = 3;
+  public static final int UNIQUE_JOIN      = 4;
+  public static final int LEFT_SEMI_JOIN   = 5;
 
   // alias to key mapping
   private Map<Byte, List<exprNodeDesc>> exprs;

Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin1.q?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin1.q Tue Nov 10 20:18:28 2009
@@ -0,0 +1,2 @@
+-- reference rhs of semijoin in select-clause
+select b.value from src a left semi join src b on (b.key = a.key and b.key = '100');

Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin2.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin2.q?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin2.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin2.q Tue Nov 10 20:18:28 2009
@@ -0,0 +1,2 @@
+-- rhs table reference in the where clause
+select a.value from src a left semi join src b on a.key = b.key where b.value = 'val_18';

Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin3.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin3.q?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin3.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin3.q Tue Nov 10 20:18:28 2009
@@ -0,0 +1,2 @@
+-- rhs table reference in group by
+select * from src a left semi join src b on a.key = b.key group by b.value;

Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin4.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin4.q?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin4.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/semijoin4.q Tue Nov 10 20:18:28 2009
@@ -0,0 +1,3 @@
+-- rhs table is a view and reference the view in where clause
+select a.value from src a left semi join (select key , value from src where key > 100) b on a.key = b.key where b.value = 'val_108' ;
+

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/semijoin.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/semijoin.q?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/semijoin.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/semijoin.q Tue Nov 10 20:18:28 2009
@@ -0,0 +1,83 @@
+drop table t1;
+drop table t2;
+drop table t3;
+drop table t4;
+
+create table t1 as select cast(key as int) key, value from src where key <= 10;
+
+select * from t1 sort by key;
+
+create table t2 as select cast(2*key as int) key, value from t1;
+
+select * from t2 sort by key;
+
+create table t3 as select * from (select * from t1 union all select * from t2) b;
+select * from t3 sort by key, value;
+
+create table t4 (key int, value string);
+select * from t4;
+
+explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value;
+select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value;
+
+explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value;
+select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value;
+
+explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value;
+select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value;
+
+explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value;
+select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value;
+
+explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value;
+select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value;
+
+explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value;
+select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value;
+
+explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ;
+select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ;
+
+explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value;
+select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value;
+
+explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key;
+select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key;
+
+explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value;
+select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value;
+
+explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value;
+select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value;
+ 
+explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value;
+select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value;
+
+explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key;
+select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key;
+
+explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+
+explain select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+
+explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key;
+
+explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key;
+select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key;
+
+explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key;
+select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key;
+
+explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key;
+select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key;
+
+explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key;
+select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key;
+
+drop table t1;
+drop table t2;
+drop table t3;
+drop table t4;

Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin1.q.out?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin1.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin1.q.out Tue Nov 10 20:18:28 2009
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: line 2:7 Invalid Table Alias or Column Reference b

Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin2.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin2.q.out?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin2.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin2.q.out Tue Nov 10 20:18:28 2009
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: line 2:70 Invalid Table Alias or Column Reference b

Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin3.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin3.q.out?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin3.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin3.q.out Tue Nov 10 20:18:28 2009
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: line 2:67 Invalid Table Alias or Column Reference b

Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin4.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin4.q.out?rev=834649&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin4.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/semijoin4.q.out Tue Nov 10 20:18:28 2009
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: line 2:112 Invalid Table Alias or Column Reference b



Mime
View raw message