trafodion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dbirds...@apache.org
Subject [1/2] incubator-trafodion git commit: Fix problems with use of HBase row count estimate
Date Mon, 06 Jun 2016 16:02:45 GMT
Repository: incubator-trafodion
Updated Branches:
  refs/heads/master f243f8017 -> 0fe963dbf


Fix problems with use of HBase row count estimate

The CQD ESTIMATE_HBASE_ROW_COUNT is no longer used anywhere but in
Update Stats, which also uses USTAT_ESTIMATE_HBASE_ROW_COUNT. Both
have to be set before the row count is estimated, and the flawed
logic used in this decision sometimes results in estimation being
performed twice. The former CQD has been removed, the latter set
to ON by default, and the decision logic has been cleaned up.
Also, the estimation is avoided if it is known that it will not
be used (e.g., if sampling is not being used).


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/9a6be3e0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/9a6be3e0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/9a6be3e0

Branch: refs/heads/master
Commit: 9a6be3e04e1f9e463530cb954d49582ac9b1fab9
Parents: d199362
Author: Barry Fritchman <blfritch@edev08.esgyn.local>
Authored: Thu Jun 2 20:22:48 2016 +0000
Committer: Barry Fritchman <blfritch@edev08.esgyn.local>
Committed: Thu Jun 2 20:22:48 2016 +0000

----------------------------------------------------------------------
 core/sql/sqlcomp/DefaultConstants.h |  7 +---
 core/sql/sqlcomp/nadefaults.cpp     |  4 +-
 core/sql/ustat/hs_globals.cpp       | 64 ++++++++++++--------------------
 core/sql/ustat/hs_la.cpp            | 25 ++++++++-----
 core/sql/ustat/hs_la.h              | 23 +++++++-----
 5 files changed, 56 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9a6be3e0/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/DefaultConstants.h b/core/sql/sqlcomp/DefaultConstants.h
index 5878bf7..b8b3609 100644
--- a/core/sql/sqlcomp/DefaultConstants.h
+++ b/core/sql/sqlcomp/DefaultConstants.h
@@ -681,7 +681,8 @@ enum DefaultConstants
   USTAT_ATTEMPT_ESP_PARALLELISM,  // use parallel plans for reading columns to form histograms
   USTAT_CHECK_HIST_ACCURACY,   // After stats collection, examine full table and calculate
accuray of hists
   USTAT_CLUSTER_SAMPLE_BLOCKS, // number of blocks for cluster sampling
-  USTAT_ESTIMATE_HBASE_ROW_COUNT,  // If ON, estimate row count of HBase table instead of
count(*)
+  USTAT_ESTIMATE_HBASE_ROW_COUNT,  // If ON, estimate row count of HBase table instead of
count(*), subject
+                                   //     to USTAT_MIN_ESTIMATE_FOR_ROWCOUNT setting)
   USTAT_FORCE_TEMP,            // Force temporary table to be used
   USTAT_HBASE_SAMPLE_RETURN_INTERVAL, // When sampling in HBase, adjust sampling rate to
return once
                                       //   on average once per this many rows
@@ -3451,10 +3452,6 @@ enum DefaultConstants
   // if ON, hbase coprocessors could be used, if the query allows it.
   HBASE_COPROCESSORS,
 
-  // If ON, optimizer will estimate HBase row count by looking at info in HFiles
-  // instead of relying on default cardinality estimate.
-  ESTIMATE_HBASE_ROW_COUNT,
-
   // if OFF or '0' is disabled, ON or '1' is simple pushdown, '2' is for advance pushdown
   // It will depends on the query on which predicates or sub-predicates could be pushed.
   HBASE_FILTER_PREDS,

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9a6be3e0/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index 6221024..713c2da 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -1307,8 +1307,6 @@ SDDui___(CYCLIC_ESP_PLACEMENT,                  "1"),
   DDSint__(ESP_PRIORITY,                        "0"),
   DDSint__(ESP_PRIORITY_DELTA,                  "0"),
 
-  DDkwd__(ESTIMATE_HBASE_ROW_COUNT,             "ON"),
-
  // Disable hints - if SYSTEM, enable on SSD, and disable only on HDD
   DDkwd__(EXE_BMO_DISABLE_CMP_HINTS_OVERFLOW_HASH,	"SYSTEM"),
   DDkwd__(EXE_BMO_DISABLE_CMP_HINTS_OVERFLOW_SORT,	"SYSTEM"),
@@ -3515,7 +3513,7 @@ XDDkwd__(SUBQUERY_UNNESTING,			"ON"),
   DDkwd__(USTAT_DEBUG_FORCE_FETCHCOUNT,         "OFF"),
   DD_____(USTAT_DEBUG_TEST,                     ""),
   DDflte_(USTAT_DSHMAX,		                "50.0"),
-  DDkwd__(USTAT_ESTIMATE_HBASE_ROW_COUNT,       "OFF"),
+  DDkwd__(USTAT_ESTIMATE_HBASE_ROW_COUNT,       "ON"),
   DDkwd__(USTAT_FETCHCOUNT_ACTIVE,              "OFF"),
   DDkwd__(USTAT_FORCE_MOM_ESTIMATOR,            "OFF"),
   DDkwd__(USTAT_FORCE_TEMP,                     "OFF"),

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9a6be3e0/core/sql/ustat/hs_globals.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.cpp b/core/sql/ustat/hs_globals.cpp
index 50c15d5..5e52d6e 100644
--- a/core/sql/ustat/hs_globals.cpp
+++ b/core/sql/ustat/hs_globals.cpp
@@ -3023,11 +3023,12 @@ Lng32 HSGlobalsClass::Initialize()
     actualRowCount = objDef->getRowCount(currentRowCountIsEstimate_,
                                          inserts, deletes, updates,
                                          numPartitions,
-                                         minRowCtPerPartition_);
+                                         minRowCtPerPartition_,
+                                         optFlags & SAMPLE_REQUESTED);
     LM->StopTimer();
     if (LM->LogNeeded())
       {
-        sprintf(LM->msg, "\tcurrentRowCountIsEstimate_=%d from GetRowCount()", currentRowCountIsEstimate_);
+        sprintf(LM->msg, "\tcurrentRowCountIsEstimate_=%d from getRowCount()", currentRowCountIsEstimate_);
         LM->Log(LM->msg);
       }
 
@@ -3051,47 +3052,28 @@ Lng32 HSGlobalsClass::Initialize()
                 LM->Log(LM->msg);
               }
           }
-        else if (!((optFlags & SAMPLE_REQUESTED) &&
-                   convertInt64ToDouble(actualRowCount) >=
-                     CmpCommon::getDefaultNumeric(USTAT_MIN_ESTIMATE_FOR_ROWCOUNT)))
+        else if (convertInt64ToDouble(actualRowCount) <   // may be 0 (no estimate) or
-1 (error doing estimation)
+                     CmpCommon::getDefaultNumeric(USTAT_MIN_ESTIMATE_FOR_ROWCOUNT))
           {
-            actualRowCount = 0;
-            if (hs_globals->isHbaseTable &&
-                CmpCommon::getDefault(USTAT_ESTIMATE_HBASE_ROW_COUNT) == DF_ON)
-              {
-                LM->StartTimer("Estimate row count for HBase table");
-                actualRowCount = objDef->getNATable()->estimateHBaseRowCount();
-                LM->StopTimer();
-                if (LM->LogNeeded())
-                  {
-                    snprintf(LM->msg, sizeof(LM->msg),
-                             "Call to estimateHBaseRowCount() returned " PF64 ".",
-                             actualRowCount);
-                    LM->Log(LM->msg);
-                  }
-              }
-
-            // If actualRowCount is still 0 then the table is not an HBase table
-            // (or the cqd is not set). If it is HIST_NO_STATS_ROWCOUNT, then
-            // estimateHBaseRowCount() was not able to produce an estimate. In either
-            // of these cases, we need to resort to a count(*).
-            if (actualRowCount == 0 ||
-                actualRowCount == ActiveSchemaDB()->getDefaults().getAsDouble(HIST_NO_STATS_ROWCOUNT))
+            if (LM->LogNeeded() && actualRowCount > 0)
+            {
+              sprintf(LM->msg, "Estimated row count " PF64 " rejected (below size threshhold).",
+                      actualRowCount);
+              LM->Log(LM->msg);
+            }
+            LM->StartTimer("Execute query to get row count");
+            query  = "SELECT COUNT(*) FROM ";
+            query += getTableName(user_table->data(), nameSpace);
+            query += " FOR READ UNCOMMITTED ACCESS";
+            retcode = cursor.fetchNumColumn(query, NULL, &actualRowCount);
+            LM->StopTimer();
+            HSHandleError(retcode);
+            currentRowCountIsEstimate_ = FALSE;
+            if (LM->LogNeeded())
               {
-                LM->StartTimer("Execute query to get row count");
-                query  = "SELECT COUNT(*) FROM ";
-                query += getTableName(user_table->data(), nameSpace);
-                query += " FOR READ UNCOMMITTED ACCESS";
-                retcode = cursor.fetchNumColumn(query, NULL, &actualRowCount);
-                LM->StopTimer();
-                HSHandleError(retcode);
-                currentRowCountIsEstimate_ = FALSE;
-                if (LM->LogNeeded())
-                  {
-                    convertInt64ToAscii(actualRowCount, intStr);
-                    sprintf(LM->msg, "\n\t\tUsing select count(*): rows=%s", intStr);
-                    LM->Log(LM->msg);
-                  }
+                convertInt64ToAscii(actualRowCount, intStr);
+                sprintf(LM->msg, "\n\t\tUsing select count(*): rows=%s", intStr);
+                LM->Log(LM->msg);
               }
           }
       }

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9a6be3e0/core/sql/ustat/hs_la.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_la.cpp b/core/sql/ustat/hs_la.cpp
index 61c66dc..ab9a2bc 100644
--- a/core/sql/ustat/hs_la.cpp
+++ b/core/sql/ustat/hs_la.cpp
@@ -444,10 +444,11 @@ void HSSqTableDef::resetRowCounts()
   }
 #endif
 
-Int64 HSSqTableDef::getRowCount(NABoolean &isEstimate)
+Int64 HSSqTableDef::getRowCount(NABoolean &isEstimate,
+                                NABoolean estimateIfNecessary)
   {
     Int64 bogus;
-    return getRowCount(isEstimate, bogus, bogus, bogus, bogus, bogus);
+    return getRowCount(isEstimate, bogus, bogus, bogus, bogus, bogus, estimateIfNecessary);
   }
 
 /***************************************************************************/
@@ -468,6 +469,7 @@ Int64 HSSqTableDef::getRowCount(NABoolean &isEstimate)
 /*              the table since the last update stats using NECESSARY.     */
 /*            numUpdates: an output value, set to the number of updates on */
 /*              the table since the last update stats using NECESSARY.     */
+/*            estimateIfNecessary: not used in this redefinition.          */
 /* RETURN VALUE: The number of rows in the table, -1 if there is an error  */
 /*               reading a partition.                                      */
 /***************************************************************************/
@@ -476,7 +478,8 @@ Int64 HSSqTableDef::getRowCount(NABoolean &isEstimate,
                               Int64 &numDeletes,
                               Int64 &numUpdates,
                               Int64 &numPartitions,
-                              Int64 &minRowCtPerPartition)
+                              Int64 &minRowCtPerPartition,
+                              NABoolean estimateIfNecessary)
   {
     isEstimate = TRUE;
     numInserts =
@@ -956,7 +959,8 @@ Int64 HSHiveTableDef::getRowCount(NABoolean &isEstimate,
                                   Int64 &numDeletes,
                                   Int64 &numUpdates,
                                   Int64 &numPartitions,
-                                  Int64 &minRowCtPerPartition)
+                                  Int64 &minRowCtPerPartition,
+                                  NABoolean estimateIfNecessary)
 {
   if (minPartitionRows_ == -1)
     {
@@ -973,7 +977,7 @@ Int64 HSHiveTableDef::getRowCount(NABoolean &isEstimate,
   numPartitions = getNumPartitions();
   minRowCtPerPartition = minPartitionRows_;
 
-  return getRowCount(isEstimate);
+  return getRowCount(isEstimate, estimateIfNecessary);
 }
 
 Lng32 HSHiveTableDef::DescribeColumnNames()
@@ -1154,10 +1158,12 @@ Lng32 HSHbaseTableDef::getNumPartitions() const
   return getNATable()->getClusteringIndex()->getCountOfPartitions();
 }
 
-Int64 HSHbaseTableDef::getRowCount(NABoolean &isEstimate)
+Int64 HSHbaseTableDef::getRowCount(NABoolean &isEstimate, NABoolean estimateIfNecessary)
 {
   isEstimate = TRUE;
-  if (!naTbl_->isSeabaseMDTable() && CmpCommon::getDefault(ESTIMATE_HBASE_ROW_COUNT)
== DF_ON)
+  if (estimateIfNecessary &&
+      !naTbl_->isSeabaseMDTable() &&
+      CmpCommon::getDefault(USTAT_ESTIMATE_HBASE_ROW_COUNT) == DF_ON)
     return naTbl_->estimateHBaseRowCount();
   else
     return 0;
@@ -1168,7 +1174,8 @@ Int64 HSHbaseTableDef::getRowCount(NABoolean &isEstimate,
                                   Int64 &numDeletes,
                                   Int64 &numUpdates,
                                   Int64 &numPartitions,
-                                  Int64 &minRowCtPerPartition)
+                                  Int64 &minRowCtPerPartition,
+                                  NABoolean estimateIfNecessary)
 {
   // Comparable code for Hive tables:
   //if (minPartitionRows_ == -1)
@@ -1185,7 +1192,7 @@ Int64 HSHbaseTableDef::getRowCount(NABoolean &isEstimate,
   //numPartitions = getNumPartitions();
   //minRowCtPerPartition = minPartitionRows_;
 
-  return getRowCount(isEstimate);
+  return getRowCount(isEstimate, estimateIfNecessary);
 }
 
 Lng32 HSHbaseTableDef::DescribeColumnNames()

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/9a6be3e0/core/sql/ustat/hs_la.h
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_la.h b/core/sql/ustat/hs_la.h
index bd70e53..3344feb 100644
--- a/core/sql/ustat/hs_la.h
+++ b/core/sql/ustat/hs_la.h
@@ -91,13 +91,15 @@ class HSTableDef : public NABasicObject
 
     virtual void getRowChangeCounts(Int64 &inserts, Int64 &deletes, Int64 &updates)
= 0;
     virtual void resetRowCounts() = 0;
-    virtual Int64 getRowCount(NABoolean &isEstimate) = 0;
+    virtual Int64 getRowCount(NABoolean &isEstimate,
+                              NABoolean estimateIfNecessary = TRUE) = 0;
     virtual Int64 getRowCount(NABoolean &isEstimate,
                       Int64 &numInserts,
                       Int64 &numDeletes,
                       Int64 &numUpdates,
                       Int64 &numPartitions,
-                      Int64 &minRowCtPerPartition
+                      Int64 &minRowCtPerPartition,
+                      NABoolean estimateIfNecessary
                      ) = 0;
     Int64 getRowCountUsingSelect();
     ComDiskFileFormat getObjectFormat() const {return objActualFormat_;}
@@ -184,13 +186,14 @@ class HSSqTableDef : public HSTableDef
 
     void getRowChangeCounts(Int64 &inserts, Int64 &deletes, Int64 &updates);
     void resetRowCounts();
-    Int64 getRowCount(NABoolean &isEstimate);
+    Int64 getRowCount(NABoolean &isEstimate, NABoolean estimateIfNecessary = TRUE);
     Int64 getRowCount(NABoolean &isEstimate,
                       Int64 &numInserts,
                       Int64 &numDeletes,
                       Int64 &numUpdates,
                       Int64 &numPartitions,
-                      Int64 &minRowCtPerPartition
+                      Int64 &minRowCtPerPartition,
+                      NABoolean estimateIfNecessary
                      );
     Lng32 collectFileStatistics() const;
     NABoolean isInMemoryObjectDefn() const {return inMemoryObjectDefn_;}
@@ -278,17 +281,18 @@ class HSHiveTableDef : public HSTableDef
       }
     void resetRowCounts()
       {}
-    Int64 getRowCount(NABoolean &isEstimate)
+    Int64 getRowCount(NABoolean &isEstimate, NABoolean estimateIfNecessary = TRUE)
       {
         isEstimate = TRUE;
-        return tableStats_->getEstimatedRowCount();
+        return (estimateIfNecessary ? tableStats_->getEstimatedRowCount() : 0);
       }
     Int64 getRowCount(NABoolean &isEstimate,
                       Int64 &numInserts,
                       Int64 &numDeletes,
                       Int64 &numUpdates,
                       Int64 &numPartitions,
-                      Int64 &minRowCtPerPartition);
+                      Int64 &minRowCtPerPartition,
+                      NABoolean estimateIfNecessary);
     Lng32 collectFileStatistics() const
       {
         return 0;
@@ -389,13 +393,14 @@ class HSHbaseTableDef : public HSTableDef
       }
     void resetRowCounts()
       {}
-    Int64 getRowCount(NABoolean &isEstimate);
+    Int64 getRowCount(NABoolean &isEstimate, NABoolean estimateIfNecessary = TRUE);
     Int64 getRowCount(NABoolean &isEstimate,
                       Int64 &numInserts,
                       Int64 &numDeletes,
                       Int64 &numUpdates,
                       Int64 &numPartitions,
-                      Int64 &minRowCtPerPartition);
+                      Int64 &minRowCtPerPartition,
+                      NABoolean estimateIfNecessary);
     Lng32 collectFileStatistics() const
       {
         return 0;


Mime
View raw message