trafodion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dbirds...@apache.org
Subject [1/4] incubator-trafodion git commit: fix JIRA1468 (missing MDAM plan)
Date Tue, 01 Sep 2015 23:43:07 GMT
Repository: incubator-trafodion
Updated Branches:
  refs/heads/master 40c27869d -> 191c82d3b


fix JIRA1468 (missing MDAM plan)


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/00c46394
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/00c46394
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/00c46394

Branch: refs/heads/master
Commit: 00c463940cac3750efbb38bd96348ad0abee3bd5
Parents: 5928f31
Author: Qifan Chen <qfc@dev02.trafodion.org>
Authored: Mon Aug 31 18:01:40 2015 +0000
Committer: Qifan Chen <qfc@dev02.trafodion.org>
Committed: Mon Aug 31 18:01:40 2015 +0000

----------------------------------------------------------------------
 core/sql/optimizer/Cost.cpp          |  19 +++++
 core/sql/optimizer/Cost.h            |   2 +-
 core/sql/optimizer/ScanOptimizer.cpp | 137 ++++++++++++++++--------------
 core/sql/optimizer/disjuncts.h       |   8 ++
 core/sql/optimizer/mdam.cpp          |  68 +++++++++++++++
 5 files changed, 171 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/00c46394/core/sql/optimizer/Cost.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/Cost.cpp b/core/sql/optimizer/Cost.cpp
index 7070f01..dc6d701 100644
--- a/core/sql/optimizer/Cost.cpp
+++ b/core/sql/optimizer/Cost.cpp
@@ -897,10 +897,23 @@ void SimpleCostVector::print(FILE* ofd) const
 // excluded for coverage because it's a debug code
 void SimpleCostVector::print(FILE* pfp) const
 {
+  fprintf(pfp,"CPUTime=%g\n",counter_[CPU_TIME].value());
+  fprintf(pfp,"IOTime=%g\n",counter_[IO_TIME].value());
+  fprintf(pfp,"MSGTime=%g\n",counter_[MSG_TIME].value());
+  fprintf(pfp,"idleTime=%g\n",counter_[IDLE_TIME].value());
+  fprintf(pfp,"tuple processed=%g\n",counter_[TC_PROC].value());
+  fprintf(pfp,"tuple produced=%g\n",counter_[TC_PROD].value());
+  fprintf(pfp,"tuple sent=%g\n",counter_[TC_SENT].value());
+  fprintf(pfp,"IO rand=%g\n",counter_[IO_RAND].value());
+  fprintf(pfp,"IO seq=%g\n",counter_[IO_SEQ].value());
+  fprintf(pfp,"num Probes=%g\n",counter_[NUM_PROBES].value());
+/*
   for (Lng32 i = 0; i < COUNT_OF_SIMPLE_COST_COUNTERS; i++)
     fprintf(pfp,"%g,",counter_[i].value());
+*/
   fprintf(pfp,"\n");
 }
+
 // LCOV_EXCL_STOP
 //<pb>
 
@@ -2449,6 +2462,12 @@ void Cost::print(FILE * pfp, const char * , const char *) const
   fprintf(pfp,"preference : %d\n", priority_.getLevel()); //sathya
   return;
 }
+
+void Cost::display() const                                        
+{ 
+  print(); 
+}
+
 // LCOV_EXCL_STOP
 
 //<pb>

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/00c46394/core/sql/optimizer/Cost.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/Cost.h b/core/sql/optimizer/Cost.h
index 3fdcb18..26600ae 100644
--- a/core/sql/optimizer/Cost.h
+++ b/core/sql/optimizer/Cost.h
@@ -535,7 +535,7 @@ public:
   void print(FILE * f = stdout,
 	     const char * prefix = "", const char * suffix = "") const;
 
-  void display() const                                        { print(); }
+  void display() const;
 //<pb>
 private:
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/00c46394/core/sql/optimizer/ScanOptimizer.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ScanOptimizer.cpp b/core/sql/optimizer/ScanOptimizer.cpp
index ef4d37a..556b81e 100644
--- a/core/sql/optimizer/ScanOptimizer.cpp
+++ b/core/sql/optimizer/ScanOptimizer.cpp
@@ -2369,12 +2369,27 @@ IndexDescHistograms::estimateUecUsingMultiColUec(
    // Instead of returning 55mil UEC, the above formula MC(A,B)/MC(A) returned 221 
    // (55727500/252136 -> 221). The new code below is similar to how we compute MCUEC
in the 
    // rest of the compiler code.
+
    for (CollIndex j=0; j<listOfSubsets->entries();j++)
    {
      vidSet = (*listOfSubsets)[j];
+#ifndef NDEBUG
+     if(getenv("MDAM_MCUEC"))
+     {
+       fprintf(stdout, "Now checkout this set\n");
+       vidSet.print();
+     }
+#endif
      if (vidSet.entries() == columnList.entries())
      {
        estimatedUec = MCUL->lookup(vidSet);
+#ifndef NDEBUG
+     if(getenv("MDAM_MCUEC"))
+     {
+       fprintf(stdout, "entry size matches\n");
+       fprintf(stdout, "MC UEC=%f\n", estimatedUec.value());
+     }
+#endif
        return TRUE;
      }
    }
@@ -2549,49 +2564,6 @@ ScanOptimizer::~ScanOptimizer()
 {
 };
 
-static NABoolean containsINListOrRanges(const Disjuncts *curDisjuncts ) 
-{
-//  CollIndex order;
-//  CollIndex numOfKeyCols=keyPredsByCol.entries();
-//  KeyColumns::KeyColumn::KeyColumnType typeOfRange = KeyColumns::KeyColumn::EMPTY;
-  Disjunct disjunct;
-  ValueIdSet vs;
-  for (CollIndex i=0; i < curDisjuncts->entries(); i++)
-  {
-    curDisjuncts->get(disjunct,i);
-    vs = disjunct.getAsValueIdSet();
-    for (ValueId predId = vs.init();
-	 vs.next(predId);
-	 vs.advance(predId) )
-    {
-      if(predId.getItemExpr()->getOperatorType() == ITM_RANGE_SPEC_FUNC )
-      {
-	if(predId.getItemExpr()->child(1)->getOperatorType() == ITM_OR ||
-	   predId.getItemExpr()->child(1)->getOperatorType() == ITM_AND)
-	  return TRUE;
-      }
-    }
-  }
-  /*
-  if (keyPredsByCol.containsPredicates())
-  {
-    for (order = 0; order < numOfKeyCols; order++)
-    {
-      if (keyPredsByCol.getPredicateExpressionPtr(order) != NULL)
-        typeOfRange = keyPredsByCol.getPredicateExpressionPtr(order)->getType();
-      else
-      	typeOfRange = KeyColumns::KeyColumn::EMPTY;
-      if (typeOfRange == KeyColumns::KeyColumn::INLIST )
-	//  ||
-	//  typeOfRange != KeyColumns::KeyColumn::RANGE)
-	return TRUE;
-    } // end of for-loop
-
-  } // if (containsPredicates())
-  */
-  return FALSE;
-}
-
 // Determine if the SimpleFileScanOptimizer can be used for the given
 // scan with the given context.
 // Return : TRUE - Use the SimpleFileScanOptimizer
@@ -2605,6 +2577,7 @@ ScanOptimizer::useSimpleFileScanOptimizer(const FileScan& associatedFileScan
 #ifndef NDEBUG
   if (CmpCommon::getDefault(MDAM_TRACING) == DF_ON )
     MdamTrace::setLevel(MTL2);
+    //MdamTrace::setLevel(MDAM_TRACE_LEVEL_ALL);
 #endif
 
   //
@@ -2790,7 +2763,7 @@ ScanOptimizer::useSimpleFileScanOptimizer(const FileScan& associatedFileScan
       if (CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON )
       {
 	if ((singleSubsetPrefixColumn+1 < keyPredsByCol.entries()) &&
-	    (curDisjuncts->entries() >= 1) && containsINListOrRanges(curDisjuncts))
+	    (curDisjuncts->entries() >= 1) && curDisjuncts->containsAndorOrPredsInRanges())
 	{
 	  return FALSE;
 	}
@@ -2811,7 +2784,7 @@ ScanOptimizer::useSimpleFileScanOptimizer(const FileScan& associatedFileScan
     if (CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON )
     {
       if ((searchKey.getKeyPredicates().entries() == 0) &&
-	  (curDisjuncts->entries() >= 1) && containsINListOrRanges(curDisjuncts))
+	  (curDisjuncts->entries() >= 1) && curDisjuncts->containsAndorOrPredsInRanges())
       {
 	return FALSE;
       }
@@ -3487,13 +3460,22 @@ ScanOptimizer::computeCostObject(
   // occuring. If there is synchronous access, then we need to
   // adjust the cost, since the cost was computed assuming that
   // asynchronous access would occur.
-  if (lppf == NULL)  // Unpartitioned table?
+  if (lppf == NULL)  // Unpartitioned, Trafodion, native HBase or hive tables?
     {
-      // If there is no lppf then the table cannot be partitioned
-      DCMPASSERT(getContext().getPlan()->getPhysicalProperty()->
-                getPartitioningFunction()->
-                 castToSinglePartitionPartitioningFunction() != NULL);
-      countOfPAs = 1;
+      PartitioningFunction* partFunc = getContext().getPlan()
+                ->getPhysicalProperty()-> getPartitioningFunction();
+
+      if ( partFunc->castToSinglePartitionPartitioningFunction() )
+         countOfPAs = 1;
+      else {
+         const FileScan& fileScan = getFileScan();
+
+         if ( fileScan.isHbaseTable() || fileScan.isHiveTable() ||
+              fileScan.isSeabaseTable())
+            countOfPAs = partFunc->getCountOfPartitions();
+         else
+            DCMPASSERT(FALSE);
+      }
     }
   else
     {
@@ -3650,7 +3632,6 @@ ScanOptimizer::computeCostObject(
 
   return costPtr;
 } // computeCostObject(...)
-
 // -----------------------------------------------------------------------
 // Use this routine to find a basic cost object to share.
 // INPUT:
@@ -4248,6 +4229,8 @@ FileScanOptimizer::optimize(SearchKey*& searchKeyPtr   /* out */
 
       DCMPASSERT(mdamDisjunctsKeyPtr != NULL);
 
+      MDAM_DEBUG0(MTL2, "call computeCostForMultipleSubset()");
+
       mdamTypeIsCommon = FALSE;
       mdamDisjunctsCostPtr =
         computeCostForMultipleSubset
@@ -6902,7 +6885,7 @@ FileScanOptimizer::oldComputeCostForMultipleSubset
 	  }
 
 	  MDAM_DEBUGX(MTL2,
-	    MdamTrace::printBasicCost(this, prefixFR, prefixLR, "Cost for prefix:"));
+	    MdamTrace::printBasicCost(this, prefixFR, prefixLR, "Cost for prefix in oldComputeCostForMultipleSubset():"));
 
 
           if (proceedViaCosting)
@@ -9131,9 +9114,18 @@ void MDAMOptimalDisjunctPrefixWA::processNonLeadingColumn()
   // $$$ times the previous UEC
   // $$$ If the hists can handle IN lists then
   // $$$ the next line is correct.
-  uecForPreviousCol_ = disjunctHistograms_.getColStatsForColumn
-    ( optimizer_.getIndexDesc()->getIndexKey()[prefixColumnPosition_-1] ).
-    getTotalUec().getCeiling();
+              
+  MDAM_DEBUG1(MTL2, "processNonLeadingColumn(), prefixCOlumnPosition_-1: %d:", prefixColumnPosition_-1);
+
+  const ColStats& colStats = disjunctHistograms_.getColStatsForColumn
+    ( optimizer_.getIndexDesc()->getIndexKey()[prefixColumnPosition_-1] );
+
+  MDAM_DEBUGX(MTL2, colStats.print());
+
+  uecForPreviousCol_ = colStats.getTotalUec().getCeiling();
+
+  MDAM_DEBUG1(MTL2, "processNonLeadingColumn(), total UEC, uecForPreviousCol_: %f:", uecForPreviousCol_.value());
+      
 
   CostScalar estimatedUec = csOne;
 
@@ -9145,9 +9137,14 @@ void MDAMOptimalDisjunctPrefixWA::processNonLeadingColumn()
 				 prefixColumnPosition_ - 1, /*in*/
 				 estimatedUec /*out*/))
     {
+
       uecForPreviousCol_ = MIN_ONE(
 	(uecForPreviousCol_ / uecForPreviousColBeforeAppPreds_)
 	*estimatedUec);
+
+      MDAM_DEBUG1(MTL2, "processNonLeadingColumn(), MC uec : %f:", estimatedUec.value());
+      MDAM_DEBUG1(MTL2, "processNonLeadingColumn(), modify by MC uec, uecForPreviousCol_:
%f:", uecForPreviousCol_.value());
+
       uecForPreviousColBeforeAppPreds_ = estimatedUec;
     }
   sumOfUecsSoFar_ += uecForPreviousColBeforeAppPreds_;
@@ -9380,10 +9377,17 @@ void MDAMOptimalDisjunctPrefixWA::updatePositions()
 	  // case, don't multiply:
 	  if (uecForPreviousCol_.isGreaterThanZero())
 	    {
-	      prefixSubsets_ *= uecForPreviousCol_;
+              MDAM_DEBUG1(MTL2, "updatePositions(), prefixSubsets_: %f:", prefixSubsets_.value());
+              MDAM_DEBUG1(MTL2, "updatePositions(), uecForPreviousCol_: %f:", uecForPreviousCol_.value());
+
+              prefixSubsets_ *= uecForPreviousCol_;
+              MDAM_DEBUG1(MTL2, "updatePositions(), updated prefixSubsets_: %f:", prefixSubsets_.value());
+
 	      prefixSubsets_ =
 		MINOF(innerRowsUpperBound_.getValue(),
 		      prefixSubsets_.getValue());
+
+              MDAM_DEBUG1(MTL2, "updatePositions(), updated prefixSubsets_ bounded: %f:",
prefixSubsets_.value());
 	    }
 	  if( uecForPrevColForSeeks_.isGreaterThanZero())
 	    {
@@ -9631,7 +9635,7 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
     } // if scanForcePtr
 
   MDAM_DEBUGX(MTL2,
-    MdamTrace::printBasicCost(&optimizer_, prefixFR, prefixLR, "Cost for prefix:"));
+    MdamTrace::printBasicCost(&optimizer_, prefixFR, prefixLR, "Cost for prefix in updateMinPrefix():"));
   if (proceedViaCosting)
     {
       // Mdam has not been forced, or forced but with the choice
@@ -9654,12 +9658,16 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
         // we should not take MIN (CostA, CostB) for the second case, it should be the cost
to apply 
         // last key predicate. 
         //     Total Mdam Cost = CostB
+        //
+        //  This is a heuristics in that we unconditionally include the last key column 
+        //  with IN list (OR preds) predicate without going through the cost comparison 
+        //  step.
         if ( (CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON ) &&
-             (containsINListOrRanges(&optimizer_.getDisjuncts())) &&
-             ( prefixColumnPosition_ == (lastColumnPosition_ - 1) )
-            )
+              optimizer_.getDisjuncts().containsOrPredsInRanges() &&
+              prefixColumnPosition_ == (lastColumnPosition_ - 1) 
+           ) {
           newMinimumFound = TRUE;
-        else
+        } else
           newMinimumFound = (pMinCost_ == NULL) ? TRUE :
             (scmCost->scmCompareCosts(*pMinCost_) == LESS);
       }
@@ -9692,6 +9700,11 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
       firstRound_ = FALSE;
       optRows_ = prefixRows_;
       optRqsts_ = prefixRqsts_;
+
+      MDAM_DEBUG1(MTL2, "<<<<<Update optRqsts_ as %f\n", prefixRqsts_.value());
+      MDAM_DEBUG1(MTL2, "prefixColumnPosition_ =%d\n", prefixColumnPosition_);
+      MDAM_DEBUG1(MTL2, "newMinimumFound=%d\n", newMinimumFound);
+
       optRqstsForSubsetBoundaries_ = prefixRqstsForSubsetBoundaries_;
       optSeeks_ = prefixSeeks_;
       optSeqKBRead_ = prefixKBRead_;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/00c46394/core/sql/optimizer/disjuncts.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/disjuncts.h b/core/sql/optimizer/disjuncts.h
index ed423e5..63ceeea 100644
--- a/core/sql/optimizer/disjuncts.h
+++ b/core/sql/optimizer/disjuncts.h
@@ -49,6 +49,9 @@
 // array class. It always contains at least one element.
 // -----------------------------------------------------------------------
 
+
+typedef NABoolean (*funcPtrT)(ItemExpr*);
+
 class Disjuncts : public NABasicObject // Abstract class
 {
 public:
@@ -70,6 +73,9 @@ public:
   const ValueIdSet& getCommonPredicates() const
   { return commonPredicates_; }
 
+  NABoolean containsOrPredsInRanges() const;
+  NABoolean containsAndorOrPredsInRanges() const;
+
 protected:
   // The intersection of all the disjuncts in the *local* disjunct array:
   virtual void computeCommonPredicates();
@@ -77,6 +83,8 @@ protected:
   // one entry: the empty disjunct
   DisjunctArray * createEmptyDisjunctArray() const;
 
+  NABoolean containsSomePredsInRanges(funcPtrT funcP) const;
+
 private:
   // the intersection of all the predicates in the disjunct
   ValueIdSet commonPredicates_;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/00c46394/core/sql/optimizer/mdam.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/mdam.cpp b/core/sql/optimizer/mdam.cpp
index ab0f1a4..82c7e5e 100644
--- a/core/sql/optimizer/mdam.cpp
+++ b/core/sql/optimizer/mdam.cpp
@@ -2101,6 +2101,74 @@ DisjunctArray * Disjuncts::createEmptyDisjunctArray() const
   return disjunctArrayPtr;
 }
 
+NABoolean isOrItemExpr(ItemExpr* iePtr)
+{
+   return (iePtr && iePtr->getOperatorType() == ITM_OR);
+}
+
+NABoolean isAndOrItemExpr(ItemExpr* iePtr)
+{
+   return (iePtr &&
+           (iePtr->getOperatorType() == ITM_AND || 
+            iePtr->getOperatorType() == ITM_OR)
+           )
+           ;
+}
+
+NABoolean Disjuncts::containsSomePredsInRanges(funcPtrT funcP) const
+{
+//  CollIndex order;
+//  CollIndex numOfKeyCols=keyPredsByCol.entries();
+//  KeyColumns::KeyColumn::KeyColumnType typeOfRange = KeyColumns::KeyColumn::EMPTY;
+  Disjunct disjunct;
+  ValueIdSet vs;
+  for (CollIndex i=0; i < entries(); i++)
+  {
+    this->get(disjunct,i);
+    vs = disjunct.getAsValueIdSet();
+    for (ValueId predId = vs.init();
+	 vs.next(predId);
+	 vs.advance(predId) )
+    {
+      if(predId.getItemExpr()->getOperatorType() == ITM_RANGE_SPEC_FUNC )
+      {
+	if ((*funcP)(predId.getItemExpr()->child(1)))
+	  return TRUE;
+      }
+    }
+  }
+  return FALSE;
+}
+
+NABoolean Disjuncts::containsOrPredsInRanges() const
+{
+   return containsSomePredsInRanges(isOrItemExpr);
+}
+
+NABoolean Disjuncts::containsAndorOrPredsInRanges() const
+{
+   return containsSomePredsInRanges(isAndOrItemExpr);
+  /*
+  if (keyPredsByCol.containsPredicates())
+  {
+    for (order = 0; order < numOfKeyCols; order++)
+    {
+      if (keyPredsByCol.getPredicateExpressionPtr(order) != NULL)
+        typeOfRange = keyPredsByCol.getPredicateExpressionPtr(order)->getType();
+      else
+      	typeOfRange = KeyColumns::KeyColumn::EMPTY;
+      if (typeOfRange == KeyColumns::KeyColumn::INLIST )
+	//  ||
+	//  typeOfRange != KeyColumns::KeyColumn::RANGE)
+	return TRUE;
+    } // end of for-loop
+
+  } // if (containsPredicates())
+  */
+  //return FALSE;
+}
+
+
 //---------------------------------------------------------
 // Methods for class DisjunctsDisjuncts                             |
 //---------------------------------------------------------


Mime
View raw message