hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject hive git commit: HIVE-14018: Make IN clause row selectivity estimation customizable (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Date Fri, 17 Jun 2016 11:15:14 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-2.1 18a748603 -> cfa9cf585


HIVE-14018: Make IN clause row selectivity estimation customizable (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cfa9cf58
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cfa9cf58
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cfa9cf58

Branch: refs/heads/branch-2.1
Commit: cfa9cf5856d2dd1788fb6bd3ad45bbc8fa9e8adc
Parents: 18a7486
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Fri Jun 17 12:14:59 2016 +0100
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Fri Jun 17 12:14:59 2016 +0100

----------------------------------------------------------------------
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java       | 5 +++++
 .../ql/optimizer/stats/annotation/StatsRulesProcFactory.java    | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cfa9cf58/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index c60a193..a6853b5 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1624,6 +1624,11 @@ public class HiveConf extends Configuration {
         "of rows and data size. Since files in tables/partitions are serialized (and optionally\n"
+
         "compressed) the estimates of number of rows and data size cannot be reliably determined.\n"
+
         "This factor is multiplied with the file size to account for serialization and compression."),
+    HIVE_STATS_IN_CLAUSE_FACTOR("hive.stats.filter.in.factor", (float) 1.0,
+        "Currently column distribution is assumed to be uniform. This can lead to overestimation/underestimation\n"
+
+        "in the number of rows filtered by a certain operator, which in turn might lead to
overprovision or\n" +
+        "underprovision of resources. This factor is applied to the cardinality estimation
of IN clauses in\n" +
+        "filter operators."),
 
     // Concurrency
     HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false,

http://git-wip-us.apache.org/repos/asf/hive/blob/cfa9cf58/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 3f82594..5625091 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -473,7 +473,8 @@ public class StatsRulesProcFactory {
         float columnFactor = dvs == 0 ? 0.5f : ((float)dvs / numRows) * values.get(i).size();
         factor *= columnFactor;
       }
-      return Math.round( (double)numRows * factor);
+      float inFactor = HiveConf.getFloatVar(aspCtx.getConf(), HiveConf.ConfVars.HIVE_STATS_IN_CLAUSE_FACTOR);
+      return Math.round( (double)numRows * factor * inFactor);
     }
 
     private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred,


Mime
View raw message