hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From harisan...@apache.org
Subject hive git commit: HIVE-7723 : Explain plan for complex query with lots of partitions is slow due to in-efficient collection used to find a matching ReadEntity (Hari Subramaniyan, reviewed by Ashutosh Chauhan)
Date Sat, 31 Oct 2015 05:52:57 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 9303b2c45 -> ed482dd0c


HIVE-7723 : Explain plan for complex query with lots of partitions is slow due to in-efficient
collection used to find a matching ReadEntity (Hari Subramaniyan, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ed482dd0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ed482dd0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ed482dd0

Branch: refs/heads/branch-1
Commit: ed482dd0cea5d33a11ef99fb6ec4495667054964
Parents: 9303b2c
Author: Hari Subramaniyan <harisankar@apache.org>
Authored: Fri Oct 30 22:51:26 2015 -0700
Committer: Hari Subramaniyan <harisankar@apache.org>
Committed: Fri Oct 30 22:52:45 2015 -0700

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/ql/Driver.java  |  3 +-
 .../apache/hadoop/hive/ql/exec/MoveTask.java    |  3 +-
 .../hive/ql/index/HiveIndexQueryContext.java    | 10 +++---
 .../hive/ql/optimizer/GenMapRedUtils.java       |  9 ++---
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  1 +
 .../apache/hadoop/hive/ql/plan/PlanUtils.java   | 38 ++++++++++++++++++++
 6 files changed, 51 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ed482dd0/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index d1db215..9b6104e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -27,6 +27,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -1583,7 +1584,7 @@ public class Driver implements CommandProcessor {
       // remove incomplete outputs.
       // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
       // remove them
-      HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
+      HashSet<WriteEntity> remOutputs = new LinkedHashSet<WriteEntity>();
       for (WriteEntity output : plan.getOutputs()) {
         if (!output.isComplete()) {
           remOutputs.add(output);

http://git-wip-us.apache.org/repos/asf/hive/blob/ed482dd0/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
index 8eb6c97..9e9d99e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
@@ -69,6 +69,7 @@ import java.security.AccessControlException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
+import java.util.LinkedHashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -423,7 +424,7 @@ public class MoveTask extends Task<MoveWork> implements Serializable
{
               // For DP, WriteEntity creation is deferred at this stage so we need to update
               // queryPlan here.
               if (queryPlan.getOutputs() == null) {
-                queryPlan.setOutputs(new HashSet<WriteEntity>());
+                queryPlan.setOutputs(new LinkedHashSet<WriteEntity>());
               }
               queryPlan.getOutputs().add(enty);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ed482dd0/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java
index 617723e..06e7547 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java
@@ -18,7 +18,7 @@
 package org.apache.hadoop.hive.ql.index;
 
 import java.io.Serializable;
-import java.util.HashSet;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
 
@@ -34,7 +34,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
  */
 public class HiveIndexQueryContext {
 
-  private HashSet<ReadEntity> additionalSemanticInputs; // additional inputs to add
to the parse context when
+  private Set<ReadEntity> additionalSemanticInputs; // additional inputs to add to
the parse context when
                                                         // merging the index query tasks
   private String indexInputFormat;        // input format to set on the TableScanOperator
to activate indexing
   private String indexIntermediateFile;   // name of intermediate file written by the index
query for the
@@ -52,12 +52,12 @@ public class HiveIndexQueryContext {
     this.queryTasks = null;
   }
 
-  public HashSet<ReadEntity> getAdditionalSemanticInputs() {
+  public Set<ReadEntity> getAdditionalSemanticInputs() {
     return additionalSemanticInputs;
   }
-  public void addAdditionalSemanticInputs(HashSet<ReadEntity> additionalParseInputs)
{
+  public void addAdditionalSemanticInputs(Set<ReadEntity> additionalParseInputs) {
     if (this.additionalSemanticInputs == null) {
-      this.additionalSemanticInputs = new HashSet<ReadEntity>();
+      this.additionalSemanticInputs = new LinkedHashSet<ReadEntity>();
     }
     this.additionalSemanticInputs.addAll(additionalParseInputs);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/ed482dd0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 693d8c7..60987b1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -576,13 +576,9 @@ public final class GenMapRedUtils {
     TableDesc tblDesc = null;
     boolean initTableDesc = false;
 
-    for (Partition part : parts) {
-      if (part.getTable().isPartitioned()) {
-        PlanUtils.addInput(inputs, new ReadEntity(part, parentViewInfo, isDirectRead));
-      } else {
-        PlanUtils.addInput(inputs, new ReadEntity(part.getTable(), parentViewInfo, isDirectRead));
-      }
+    PlanUtils.addPartitionInputs(parts, inputs, parentViewInfo, isDirectRead);
 
+    for (Partition part: parts) {
       // Later the properties have to come from the partition as opposed
       // to from the table in order to support versioning.
       Path[] paths = null;
@@ -692,6 +688,7 @@ public final class GenMapRedUtils {
         }
       }
     }
+
     if (emptyInput) {
       parseCtx.getGlobalLimitCtx().disableOpt();
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/ed482dd0/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 6e82f41..5d2a9bd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9623,6 +9623,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         if (partitions != null) {
           for (Partition partn : partitions) {
             // inputs.add(new ReadEntity(partn)); // is this needed at all?
+	      LOG.info("XXX: adding part: "+partn);
             outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK));
           }
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/ed482dd0/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
index 76926e7..e72742c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
@@ -23,6 +23,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -52,6 +53,7 @@ import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
+import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
@@ -955,6 +957,42 @@ public final class PlanUtils {
       throw new RuntimeException(e);
     }
   }
+  
+  public static void addPartitionInputs(Collection<Partition> parts, Collection<ReadEntity>
inputs,
+      ReadEntity parentViewInfo, boolean isDirectRead) {
+    // Store the inputs in a HashMap since we can't get a ReadEntity from inputs since it
is
+    // implemented as a set.ReadEntity is used as the key so that the HashMap has the same
behavior
+    // of equals and hashCode
+    Map<ReadEntity, ReadEntity> readEntityMap =
+        new LinkedHashMap<ReadEntity, ReadEntity>(inputs.size());
+    for (ReadEntity input : inputs) {
+      readEntityMap.put(input, input);
+    }
+
+    for (Partition part : parts) {
+      ReadEntity newInput = null;
+      if (part.getTable().isPartitioned()) {
+        newInput = new ReadEntity(part, parentViewInfo, isDirectRead);
+      } else {
+        newInput = new ReadEntity(part.getTable(), parentViewInfo, isDirectRead);
+      }
+
+      if (readEntityMap.containsKey(newInput)) {
+        ReadEntity input = readEntityMap.get(newInput);
+        if ((newInput.getParents() != null) && (!newInput.getParents().isEmpty()))
{
+          input.getParents().addAll(newInput.getParents());
+          input.setDirect(input.isDirect() || newInput.isDirect());
+        }
+      } else {
+        readEntityMap.put(newInput, newInput);
+      }
+    }
+
+    // Add the new ReadEntity that were added to readEntityMap in PlanUtils.addInput
+    if (inputs.size() != readEntityMap.size()) {
+      inputs.addAll(readEntityMap.keySet());
+    }
+  }
 
   public static void addInputsForView(ParseContext parseCtx) throws HiveException {
     Set<ReadEntity> inputs = parseCtx.getSemanticInputs();


Mime
View raw message