hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1667352 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql: optimizer/ConvertJoinMapJoin.java optimizer/ReduceSinkMapJoinProc.java optimizer/TezBucketJoinProcCtx.java plan/CommonMergeJoinDesc.java plan/MapJoinDesc.java
Date Tue, 17 Mar 2015 16:27:31 GMT
Author: hashutosh
Date: Tue Mar 17 16:27:30 2015
New Revision: 1667352

URL: http://svn.apache.org/r1667352
Log:
HIVE-9966 : Get rid of customBucketMapJoin field from MapJoinDesc (Ashutosh Chauhan via Vikram
Dixit)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TezBucketJoinProcCtx.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java?rev=1667352&r1=1667351&r2=1667352&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java Tue
Mar 17 16:27:30 2015
@@ -93,7 +93,7 @@ public class ConvertJoinMapJoin implemen
         return retval;
       } else {
         int pos = 0; // it doesn't matter which position we use in this case.
-        convertJoinSMBJoin(joinOp, context, pos, 0, false, false);
+        convertJoinSMBJoin(joinOp, context, pos, 0, false);
         return null;
       }
     }
@@ -135,7 +135,7 @@ public class ConvertJoinMapJoin implemen
       } else {
         // only case is full outer join with SMB enabled which is not possible. Convert to
regular
         // join.
-        convertJoinSMBJoin(joinOp, context, 0, 0, false, false);
+        convertJoinSMBJoin(joinOp, context, 0, 0, false);
         return null;
       }
     }
@@ -155,7 +155,7 @@ public class ConvertJoinMapJoin implemen
       // we are just converting to a common merge join operator. The shuffle
       // join in map-reduce case.
       int pos = 0; // it doesn't matter which position we use in this case.
-      convertJoinSMBJoin(joinOp, context, pos, 0, false, false);
+      convertJoinSMBJoin(joinOp, context, pos, 0, false);
       return null;
     }
 
@@ -180,7 +180,7 @@ public class ConvertJoinMapJoin implemen
     // map join either based on the size. Check if we can convert to SMB join.
     if ((context.conf.getBoolVar(HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN) == false)
         || (joinOp.getOpTraits().getNumReduceSinks() >= 2)) {
-      convertJoinSMBJoin(joinOp, context, 0, 0, false, false);
+      convertJoinSMBJoin(joinOp, context, 0, 0, false);
       return null;
     }
     Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
@@ -188,7 +188,7 @@ public class ConvertJoinMapJoin implemen
       String selector = HiveConf.getVar(context.parseContext.getConf(),
           HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
       bigTableMatcherClass =
-          (Class<? extends BigTableSelectorForAutoSMJ>) JavaUtils.loadClass(selector);
+          JavaUtils.loadClass(selector);
     } catch (ClassNotFoundException e) {
       throw new SemanticException(e.getMessage());
     }
@@ -210,18 +210,18 @@ public class ConvertJoinMapJoin implemen
       // we are just converting to a common merge join operator. The shuffle
       // join in map-reduce case.
       int pos = 0; // it doesn't matter which position we use in this case.
-      convertJoinSMBJoin(joinOp, context, pos, 0, false, false);
+      convertJoinSMBJoin(joinOp, context, pos, 0, false);
       return null;
     }
 
     if (checkConvertJoinSMBJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx))
{
       convertJoinSMBJoin(joinOp, context, mapJoinConversionPos,
-          tezBucketJoinProcCtx.getNumBuckets(), tezBucketJoinProcCtx.isSubQuery(), true);
+          tezBucketJoinProcCtx.getNumBuckets(), true);
     } else {
       // we are just converting to a common merge join operator. The shuffle
       // join in map-reduce case.
       int pos = 0; // it doesn't matter which position we use in this case.
-      convertJoinSMBJoin(joinOp, context, pos, 0, false, false);
+      convertJoinSMBJoin(joinOp, context, pos, 0, false);
     }
     return null;
   }
@@ -229,7 +229,7 @@ public class ConvertJoinMapJoin implemen
   // replaces the join operator with a new CommonJoinOperator, removes the
   // parent reduce sinks
   private void convertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext context,
-      int mapJoinConversionPos, int numBuckets, boolean isSubQuery, boolean adjustParentsChildren)
+      int mapJoinConversionPos, int numBuckets, boolean adjustParentsChildren)
       throws SemanticException {
     MapJoinDesc mapJoinDesc = null;
     if (adjustParentsChildren) {
@@ -253,7 +253,7 @@ public class ConvertJoinMapJoin implemen
 
     CommonMergeJoinOperator mergeJoinOp =
         (CommonMergeJoinOperator) OperatorFactory.get(new CommonMergeJoinDesc(numBuckets,
-            isSubQuery, mapJoinConversionPos, mapJoinDesc), joinOp.getSchema());
+            mapJoinConversionPos, mapJoinDesc), joinOp.getSchema());
     int numReduceSinks = joinOp.getOpTraits().getNumReduceSinks();
     OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), numBuckets,
joinOp
         .getOpTraits().getSortCols(), numReduceSinks);
@@ -363,8 +363,6 @@ public class ConvertJoinMapJoin implemen
     Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
     bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
     joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
-    LOG.info("Setting legacy map join to " + (!tezBucketJoinProcCtx.isSubQuery()));
-    joinDesc.setCustomBucketMapJoin(!tezBucketJoinProcCtx.isSubQuery());
 
     return true;
   }
@@ -405,13 +403,10 @@ public class ConvertJoinMapJoin implemen
       }
     }
 
-    boolean isSubQuery = false;
     if (numBuckets < 0) {
-      isSubQuery = true;
       numBuckets = bigTableRS.getConf().getNumReducers();
     }
     tezBucketJoinProcCtx.setNumBuckets(numBuckets);
-    tezBucketJoinProcCtx.setIsSubQuery(isSubQuery);
     LOG.info("We can convert the join to an SMB join.");
     return true;
   }
@@ -457,13 +452,10 @@ public class ConvertJoinMapJoin implemen
      * this is the case when the big table is a sub-query and is probably already bucketed
by the
      * join column in say a group by operation
      */
-    boolean isSubQuery = false;
     if (numBuckets < 0) {
-      isSubQuery = true;
       numBuckets = rs.getConf().getNumReducers();
     }
     tezBucketJoinProcCtx.setNumBuckets(numBuckets);
-    tezBucketJoinProcCtx.setIsSubQuery(isSubQuery);
     return true;
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java?rev=1667352&r1=1667351&r2=1667352&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
Tue Mar 17 16:27:30 2015
@@ -31,8 +31,10 @@ import org.apache.hadoop.hive.ql.exec.Ha
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -64,7 +66,7 @@ public class ReduceSinkMapJoinProc imple
 
   /* (non-Javadoc)
    * This processor addresses the RS-MJ case that occurs in tez on the small/hash
-   * table side of things. The work that RS will be a part of must be connected 
+   * table side of things. The work that RS will be a part of must be connected
    * to the MJ work via be a broadcast edge.
    * We should not walk down the tree when we encounter this pattern because:
    * the type of work (map work or reduce work) needs to be determined
@@ -91,7 +93,7 @@ public class ReduceSinkMapJoinProc imple
     parentRS.setSkipTag(true);
     // remember the original parent list before we start modifying it.
     if (!context.mapJoinParentMap.containsKey(mapJoinOp)) {
-      List<Operator<?>> parents = new ArrayList(mapJoinOp.getParentOperators());
+      List<Operator<?>> parents = new ArrayList<Operator<?>>(mapJoinOp.getParentOperators());
       context.mapJoinParentMap.put(mapJoinOp, parents);
     }
 
@@ -173,9 +175,12 @@ public class ReduceSinkMapJoinProc imple
       parentRS.getConf().setReducerTraits(EnumSet.of(FIXED));
 
       numBuckets = (Integer) joinConf.getBigTableBucketNumMapping().values().toArray()[0];
-      if (joinConf.getCustomBucketMapJoin()) {
+      Operator<?> rootOp = OperatorUtils.findSingleOperatorUpstream(mapJoinOp.getParentOperators()
+          .get(joinConf.getPosBigTable()), TableScanOperator.class);
+
+      if (rootOp instanceof TableScanOperator) { // we will run in mapper
         edgeType = EdgeType.CUSTOM_EDGE;
-      } else {
+      } else { // we will run in reducer
         edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
       }
     }
@@ -218,8 +223,8 @@ public class ReduceSinkMapJoinProc imple
     }
     linkWorkMap.put(parentWork, edgeProp);
     context.linkOpWithWorkMap.put(mapJoinOp, linkWorkMap);
-    
-    List<ReduceSinkOperator> reduceSinks 
+
+    List<ReduceSinkOperator> reduceSinks
       = context.linkWorkWithReduceSinkMap.get(parentWork);
     if (reduceSinks == null) {
       reduceSinks = new ArrayList<ReduceSinkOperator>();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TezBucketJoinProcCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TezBucketJoinProcCtx.java?rev=1667352&r1=1667351&r2=1667352&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TezBucketJoinProcCtx.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TezBucketJoinProcCtx.java Tue
Mar 17 16:27:30 2015
@@ -18,28 +18,15 @@
 
 package org.apache.hadoop.hive.ql.optimizer;
 
-import java.util.List;
-import java.util.Map;
-
 import org.apache.hadoop.hive.conf.HiveConf;
 
 public class TezBucketJoinProcCtx extends BucketJoinProcCtx {
-  // determines if we need to use custom edge or one-to-one edge
-  boolean isSubQuery = false;
   int numBuckets = -1;
 
   public TezBucketJoinProcCtx(HiveConf conf) {
     super(conf);
   }
 
-  public void setIsSubQuery (boolean isSubQuery) {
-    this.isSubQuery = isSubQuery;
-  }
-
-  public boolean isSubQuery () {
-    return isSubQuery;
-  }
-
   public void setNumBuckets(int numBuckets) {
     this.numBuckets = numBuckets;
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java?rev=1667352&r1=1667351&r2=1667352&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java Tue Mar
17 16:27:30 2015
@@ -24,24 +24,18 @@ import java.io.Serializable;
 public class CommonMergeJoinDesc extends MapJoinDesc implements Serializable {
   private static final long serialVersionUID = 1L;
   private int numBuckets;
-  private boolean isSubQuery;
   private int mapJoinConversionPos;
 
   CommonMergeJoinDesc() {
   }
 
-  public CommonMergeJoinDesc(int numBuckets, boolean isSubQuery, int mapJoinConversionPos,
+  public CommonMergeJoinDesc(int numBuckets, int mapJoinConversionPos,
       MapJoinDesc joinDesc) {
     super(joinDesc);
     this.numBuckets = numBuckets;
-    this.isSubQuery = isSubQuery;
     this.mapJoinConversionPos = mapJoinConversionPos;
   }
 
-  public boolean getCustomMerge() {
-    return isSubQuery;
-  }
-
   public int getNumBuckets() {
     return numBuckets;
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java?rev=1667352&r1=1667351&r2=1667352&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java Tue Mar 17 16:27:30
2015
@@ -53,9 +53,6 @@ public class MapJoinDesc extends JoinDes
   private Map<Integer, String> parentToInput = new HashMap<Integer, String>();
   private Map<Integer, Long> parentKeyCounts = new HashMap<Integer, Long>();
 
-  // for tez. used to remember which type of a Bucket Map Join this is.
-  private boolean customBucketMapJoin;
-
   // table alias (small) --> input file name (big) --> target file names (small)
   private Map<String, Map<String, List<String>>> aliasBucketFileNameMapping;
   private Map<String, Integer> bigTableBucketNumMapping;
@@ -90,7 +87,6 @@ public class MapJoinDesc extends JoinDes
     this.dumpFilePrefix = clone.dumpFilePrefix;
     this.parentToInput = clone.parentToInput;
     this.parentKeyCounts = clone.parentKeyCounts;
-    this.customBucketMapJoin = clone.customBucketMapJoin;
   }
 
   public MapJoinDesc(final Map<Byte, List<ExprNodeDesc>> keys,
@@ -327,14 +323,7 @@ public class MapJoinDesc extends JoinDes
     return hashtableMemoryUsage;
   }
 
-  public void setCustomBucketMapJoin(boolean customBucketMapJoin) {
-    this.customBucketMapJoin = customBucketMapJoin;
-  }
-
-  public boolean getCustomBucketMapJoin() {
-    return this.customBucketMapJoin;
-  }
-
+  @Override
   public boolean isMapSideJoin() {
     return true;
   }



Mime
View raw message