hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject svn commit: r1645089 - in /hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer: AbstractBucketJoinProc.java BucketJoinProcCtx.java MapJoinProcessor.java spark/SparkMapJoinOptimizer.java
Date Fri, 12 Dec 2014 22:54:40 GMT
Author: xuefu
Date: Fri Dec 12 22:54:39 2014
New Revision: 1645089

URL: http://svn.apache.org/r1645089
Log:
HIVE-9081: Bucket mapjoin should use the new alias in posToAliasMap [Spark Branch] (Jimmy
via Xuefu)

Modified:
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketJoinProcCtx.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java?rev=1645089&r1=1645088&r2=1645089&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
(original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
Fri Dec 12 22:54:39 2014
@@ -27,6 +27,7 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 import java.util.Stack;
 
 import org.apache.commons.logging.Log;
@@ -204,6 +205,8 @@ abstract public class AbstractBucketJoin
     HashMap<String, Operator<? extends OperatorDesc>> topOps = pGraphContext.getTopOps();
     Map<TableScanOperator, Table> topToTable = pGraphContext.getTopToTable();
 
+    HashMap<String, String> aliasToNewAliasMap = new HashMap<String, String>();
+
     // (partition to bucket file names) and (partition to bucket number) for
     // the big table;
     LinkedHashMap<Partition, List<String>> bigTblPartsToBucketFileNames =
@@ -241,6 +244,7 @@ abstract public class AbstractBucketJoin
             if (baseBigAlias.equals(alias)) {
               baseBigAlias = newAlias;
             }
+            aliasToNewAliasMap.put(alias, newAlias);
             alias = newAlias;
             break;
           }
@@ -353,6 +357,9 @@ abstract public class AbstractBucketJoin
     context.setJoinAliases(joinAliases);
     context.setBaseBigAlias(baseBigAlias);
     context.setBigTablePartitioned(bigTablePartitioned);
+    if (!aliasToNewAliasMap.isEmpty()) {
+      context.setAliasToNewAliasMap(aliasToNewAliasMap);
+    }
 
     return true;
   }
@@ -433,6 +440,18 @@ abstract public class AbstractBucketJoin
       desc.setBigTablePartSpecToFileMapping(convert(bigTblPartsToBucketFileNames));
     }
 
+    Map<Integer, Set<String>> posToAliasMap = mapJoinOp.getPosToAliasMap();
+    Map<String, String> aliasToNewAliasMap = context.getAliasToNewAliasMap();
+    if (aliasToNewAliasMap != null && posToAliasMap != null) {
+      for (Map.Entry<String, String> entry: aliasToNewAliasMap.entrySet()) {
+        for (Set<String> aliases: posToAliasMap.values()) {
+          if (aliases.remove(entry.getKey())) {
+            aliases.add(entry.getValue());
+          }
+        }
+      }
+    }
+
     // successfully convert to bucket map join
     desc.setBucketMapJoin(true);
   }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketJoinProcCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketJoinProcCtx.java?rev=1645089&r1=1645088&r2=1645089&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketJoinProcCtx.java
(original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketJoinProcCtx.java
Fri Dec 12 22:54:39 2014
@@ -40,6 +40,10 @@ public class BucketJoinProcCtx implement
   // The set of join operators which can be converted to a bucketed map join
   private Set<JoinOperator> convertedJoinOps = new HashSet<JoinOperator>();
 
+  // In checking if a mapjoin can be converted to bucket mapjoin,
+  // some join alias could be changed: alias -> newAlias
+  private transient Map<String, String> aliasToNewAliasMap;
+
   private Map<String, List<Integer>> tblAliasToNumberOfBucketsInEachPartition;
   private Map<String, List<List<String>>> tblAliasToBucketedFilePathsInEachPartition;
   private Map<Partition, List<String>> bigTblPartsToBucketFileNames;
@@ -130,4 +134,12 @@ public class BucketJoinProcCtx implement
   public void setBigTablePartitioned(boolean bigTablePartitioned) {
     this.bigTablePartitioned = bigTablePartitioned;
   }
+
+  public void setAliasToNewAliasMap(Map<String, String> aliasToNewAliasMap) {
+    this.aliasToNewAliasMap = aliasToNewAliasMap;
+  }
+
+  public Map<String, String> getAliasToNewAliasMap() {
+    return aliasToNewAliasMap;
+  }
 }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=1645089&r1=1645088&r2=1645089&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
(original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
Fri Dec 12 22:54:39 2014
@@ -410,6 +410,7 @@ public class MapJoinProcessor implements
       childOp.replaceParent(op, mapJoinOp);
     }
 
+    mapJoinOp.setPosToAliasMap(op.getPosToAliasMap());
     mapJoinOp.setChildOperators(childOps);
     op.setChildOperators(null);
     op.setParentOperators(null);

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java?rev=1645089&r1=1645088&r2=1645089&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
(original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
Fri Dec 12 22:54:39 2014
@@ -294,15 +294,12 @@ public class SparkMapJoinOptimizer imple
         }
       }
     }
+    mapJoinOp.setPosToAliasMap(posToAliasMap);
     BucketMapjoinProc.checkAndConvertBucketMapJoin(
       parseContext, mapJoinOp, joinTree, baseBigAlias, joinAliases);
-    int numBuckets = -1;
     MapJoinDesc joinDesc = mapJoinOp.getConf();
-    if (joinDesc.isBucketMapJoin()) {
-      numBuckets = joinDesc.getBigTableBucketNumMapping().size();
-      mapJoinOp.setPosToAliasMap(joinOp.getPosToAliasMap());
-    }
-    return numBuckets;
+    return joinDesc.isBucketMapJoin() ?
+      joinDesc.getBigTableBucketNumMapping().size() : -1;
   }
 
   /**



Mime
View raw message