Return-Path: X-Original-To: apmail-trafodion-commits-archive@www.apache.org Delivered-To: apmail-trafodion-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id BF7BE184AF for ; Thu, 10 Dec 2015 23:23:19 +0000 (UTC) Received: (qmail 69409 invoked by uid 500); 10 Dec 2015 23:23:19 -0000 Delivered-To: apmail-trafodion-commits-archive@trafodion.apache.org Received: (qmail 69379 invoked by uid 500); 10 Dec 2015 23:23:19 -0000 Mailing-List: contact commits-help@trafodion.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: commits@trafodion.apache.org Delivered-To: mailing list commits@trafodion.apache.org Received: (qmail 69370 invoked by uid 99); 10 Dec 2015 23:23:19 -0000 Received: from Unknown (HELO spamd1-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 10 Dec 2015 23:23:19 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd1-us-west.apache.org (ASF Mail Server at spamd1-us-west.apache.org) with ESMTP id 29808CBD44 for ; Thu, 10 Dec 2015 23:23:19 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd1-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: 1.771 X-Spam-Level: * X-Spam-Status: No, score=1.771 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, T_RP_MATCHES_RCVD=-0.01, URIBL_BLOCKED=0.001] autolearn=disabled Received: from mx1-us-west.apache.org ([10.40.0.8]) by localhost (spamd1-us-west.apache.org [10.40.0.7]) (amavisd-new, port 10024) with ESMTP id aDohXStlkIrp for ; Thu, 10 Dec 2015 23:23:12 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-us-west.apache.org (ASF Mail Server at mx1-us-west.apache.org) with SMTP id 3A7DD265D8 for ; Thu, 10 Dec 2015 23:23:12 +0000 (UTC) Received: (qmail 69099 invoked by uid 99); 10 Dec 2015 23:23:12 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 10 Dec 2015 23:23:12 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id BADBCE17E6; Thu, 10 Dec 2015 23:23:11 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: hzeller@apache.org To: commits@trafodion.incubator.apache.org Date: Thu, 10 Dec 2015 23:23:11 -0000 Message-Id: <5dbebd90a6b54aaf9bb1e43d36c3ca08@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/7] incubator-trafodion git commit: [TRAFODION-1695] Optimize ORDER BY and GROUP BY with salt and divisioning Repository: incubator-trafodion Updated Branches: refs/heads/master 99091d7a3 -> d61ac8a1c [TRAFODION-1695] Optimize ORDER BY and GROUP BY with salt and divisioning For queries that access only a single salt bucket or a single division, we should be able to produce the rows of the table in order of the primary key. This change adds some optimizations to do that. Predicates on computed columns are used to find queries that access only a single salt or division value. The method to match actual with required orders recognizes these predicates and also stores them in the group attributes, so that later checks again recognize the optimization. Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/680900cd Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/680900cd Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/680900cd Branch: refs/heads/master Commit: 680900cd6e2f5daa959b449d12cda3aa7d8a7853 Parents: 07b73d9 Author: Hans Zeller Authored: Tue Sep 8 20:48:05 2015 +0000 Committer: Hans Zeller Committed: Wed Dec 9 02:47:37 2015 +0000 ---------------------------------------------------------------------- core/sql/common/OperTypeEnum.h | 23 +-- core/sql/optimizer/GroupAttr.cpp | 153 ++++++++++++++++++++ core/sql/optimizer/GroupAttr.h | 4 + core/sql/optimizer/ImplRule.cpp | 16 +-- core/sql/optimizer/ItemConstr.h | 39 ++++++ core/sql/optimizer/ItemExpr.cpp | 36 ++++- core/sql/optimizer/ItemExpr.h | 4 +- core/sql/optimizer/OptPhysRelExpr.cpp | 3 +- core/sql/optimizer/PhyProp.cpp | 2 +- core/sql/optimizer/ValueDesc.cpp | 194 ++++++++++++++++---------- core/sql/optimizer/ValueDesc.h | 14 +- core/sql/regress/compGeneral/EXPECTED071 | 47 +++++++ core/sql/regress/compGeneral/TEST071 | 22 +++ core/sql/regress/seabase/EXPECTED010 | 84 +++++++++++ core/sql/regress/seabase/TEST010 | 4 + core/sql/regress/seabase/TEST014 | 1 + 16 files changed, 548 insertions(+), 98 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/common/OperTypeEnum.h ---------------------------------------------------------------------- diff --git a/core/sql/common/OperTypeEnum.h b/core/sql/common/OperTypeEnum.h index d6bb8cf..6e5d2ae 100644 --- a/core/sql/common/OperTypeEnum.h +++ b/core/sql/common/OperTypeEnum.h @@ -715,27 +715,28 @@ enum OperatorTypeEnum { ITM_REF_CONSTRAINT = 2503, ITM_UNIQUE_OPT_CONSTRAINT = 2504, ITM_FUNC_DEPEND_CONSTRAINT = 2505, - ITM_REF_OPT_CONSTRAINT = 2506, - ITM_COMP_REF_OPT_CONSTRAINT = 2507, + ITM_CHECK_OPT_CONSTRAINT, + ITM_REF_OPT_CONSTRAINT, + ITM_COMP_REF_OPT_CONSTRAINT, // lookup a column in a native hbase table being accessed in row format - ITM_HBASE_COLUMN_LOOKUP = 2508, + ITM_HBASE_COLUMN_LOOKUP, // display hbase columns being accessed in row format - ITM_HBASE_COLUMNS_DISPLAY = 2509, + ITM_HBASE_COLUMNS_DISPLAY, - ITM_HBASE_COLUMN_CREATE = 2510, + ITM_HBASE_COLUMN_CREATE, // generate sequence numbers - ITM_SEQUENCE_VALUE = 2511, + ITM_SEQUENCE_VALUE, // return number of the row being returned. Starts at 1 - ITM_ROWNUM = 2512, + ITM_ROWNUM, - ITM_HBASE_TIMESTAMP = 2513, - ITM_HBASE_TIMESTAMP_REF = 2514, - ITM_HBASE_VERSION = 2515, - ITM_HBASE_VERSION_REF = 2516, + ITM_HBASE_TIMESTAMP, + ITM_HBASE_TIMESTAMP_REF, + ITM_HBASE_VERSION, + ITM_HBASE_VERSION_REF, // list of item expressions ITM_ITEM_LIST = 2550, http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/optimizer/GroupAttr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/GroupAttr.cpp b/core/sql/optimizer/GroupAttr.cpp index 90989be..d027745 100644 --- a/core/sql/optimizer/GroupAttr.cpp +++ b/core/sql/optimizer/GroupAttr.cpp @@ -327,6 +327,38 @@ void GroupAttributes::addConstraint(ItemExpr *c) } break; + case ITM_CHECK_OPT_CONSTRAINT: + { + CheckOptConstraint *cc = (CheckOptConstraint *) c; + + // check existing uniqueness constraints whether they are similar + // and combine uniqueness constraints if possible + for (ValueId occ = constraints_.init(); + constraints_.next(occ); + constraints_.advance(occ)) + { + if (occ.getItemExpr()->getOperatorType() == + ITM_CHECK_OPT_CONSTRAINT) + { + const ValueIdSet &occPreds = + ((CheckOptConstraint *) occ.getItemExpr())->getCheckPreds(); + + if (occPreds.contains(cc->getCheckPreds())) + { + // this is no news, delete this useless new constraint + duplicateConstraint = TRUE; + } + else if(cc->getCheckPreds().contains(cc->getCheckPreds())) + { + // we are improving an existing check constraint, + // take the existing one out + constraints_ -= occ; + } + } // this is an existing uniqueness constraint + } // for each existing constraint + } + break; + case ITM_UNIQUE_CONSTRAINT: DCMPASSERT("Wrong constraint type used in GA" == 0); // LCOV_EXCL_LINE break; @@ -580,6 +612,127 @@ void GroupAttributes::addSuitableCompRefOptConstraints( } // loop over constraints } +// a const method for validating eliminated columns in a sort order, +// usually called for validating an earlier result created with +// the next method below, tryToEliminateOrderColumnBasedOnEqualsPred() +NABoolean GroupAttributes::canEliminateOrderColumnBasedOnEqualsPred( + ValueId col) const +{ + // cast away const-ness and call the non-const method without + // predicates, which will mean it won't side-effect "this" + return const_cast(this)-> + tryToEliminateOrderColumnBasedOnEqualsPred(col, NULL); +} + +// This and the previous method are used to match required sort orders +// or arrangements to an actual key ordering of a table, in cases +// where some columns are equated to a constant, like this, with the +// clustering key being (a,b,c) +// +// select ... +// from t +// where a = 5 +// order by b +// +// Note that for VEGs, this is handled differently (see +// ValueIdList::satisfiesReqdOrder), this code is only for non-VEG +// cases (usually computed columns, also varchar). +// +// If we eliminate a column based on a supplied predicate, then +// this predicate is added as an Optimizer check constraint to the +// group attributes, so that future calls will continue to accept +// the simplified sort order. +NABoolean GroupAttributes::tryToEliminateOrderColumnBasedOnEqualsPred( + ValueId col, + const ValueIdSet *preds) +{ + NABoolean result = FALSE; + + if (preds || hasConstraintOfType(ITM_CHECK_OPT_CONSTRAINT)) + { + // Comparison failed. If the caller provided predicates, + // then we can try something similar to what we did with + // group attributes above. If the predicate equate the + // column with a constant, then we can eliminate it as + // well. Note that we don't expect the requirements to + // omit such columns, since the parent will usually not + // know about such predicates, so we check this condition + // only after trying the regular method. + // + // This situation typically happens with computed columns + // where predicates are added after VEGs are formed + + ValueIdSet checkConstraints; + ValueIdSet checkPreds; + + // look for predicates we remembered from earlier calls + getConstraintsOfType(ITM_CHECK_OPT_CONSTRAINT, + checkConstraints); + + for (ValueId c = checkConstraints.init(); + checkConstraints.next(c); + checkConstraints.advance(c)) + checkPreds += static_cast( + c.getItemExpr())->getCheckPreds(); + + // also use newly provided predicates + if (preds) + checkPreds += *preds; + + // if the column is descending, then get rid of the Inverse + // operator for the next check + if (col.getItemExpr()->getOperatorType() == ITM_INVERSE) + col = col.getItemExpr()->child(0).getValueId(); + + // convert col from a VEGRef to a base column, if needed, + // the ScanKey method below wants a real column as input + if (col.getItemExpr()->getOperatorType() == ITM_VEG_REFERENCE) + { + const ValueIdSet &vegMembers = + static_cast(col.getItemExpr())-> + getVEG()->getAllValues(); + for (ValueId b=vegMembers.init(); + vegMembers.next(b); + vegMembers.advance(b)) + if (b.getItemExpr()->getOperatorType() == ITM_BASECOLUMN) + col = b; + } + + for (ValueId p = checkPreds.init(); + checkPreds.next(p); + checkPreds.advance(p)) + { + ValueId dummy1, dummy2; + + if (p.getItemExpr()->getOperatorType() == ITM_EQUAL && + ScanKey::isAKeyPredicateForColumn( + p, + dummy1, dummy2, + col, + getCharacteristicInputs())) + { + // this is a predicate of the form col = const + // and col is our current ValueId in tempThis, + // therefore skip over it and try again + result = TRUE; + + // if we used a newly provided predicate, then + // remember it in the constraints, so that when + // the search engine validates our physical + // property later, it will come to the same + // conclusion + if (preds && preds->contains(p)) + addConstraint( + new(CmpCommon::statementHeap()) CheckOptConstraint( + ValueIdSet(p))); + } + } + } + + return result; +} + + // ----------------------------------------------------------------------- // Low-level utility for merging Group Attributes. // ----------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/optimizer/GroupAttr.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/GroupAttr.h b/core/sql/optimizer/GroupAttr.h index bcb3861..0eb9d3f 100644 --- a/core/sql/optimizer/GroupAttr.h +++ b/core/sql/optimizer/GroupAttr.h @@ -246,6 +246,10 @@ public: Cardinality getMaxNumOfRows() const { Cardinality x,y; hasCardConstraint(x,y); return y; } + NABoolean canEliminateOrderColumnBasedOnEqualsPred(ValueId col) const; + NABoolean tryToEliminateOrderColumnBasedOnEqualsPred(ValueId col, + const ValueIdSet *preds); + // --------------------------------------------------------------------- // Methods on group persistent estimates // --------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/optimizer/ImplRule.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/ImplRule.cpp b/core/sql/optimizer/ImplRule.cpp index c1ea07c..9f9f84a 100644 --- a/core/sql/optimizer/ImplRule.cpp +++ b/core/sql/optimizer/ImplRule.cpp @@ -1296,17 +1296,16 @@ RelExpr * generateScanSubstitutes(RelExpr * before, if (resultNeedsToBeOrdered) { ValueIdList sortKey = idesc->getOrderOfKeyValues(); - // Remove from the sort key any columns that are covered by - // constants or input values. - sortKey.removeCoveredExprs( - before->getGroupAttr()->getCharacteristicInputs()); - // make sure it satisfies the required order and also - // determine the scan direction + // Make sure it satisfies the required order and also + // determine the scan direction. If computed column predicates + // select only one salt or division value, for example, + // then make use of this to satisfy order requirements. if ((rppForMe->getSortKey() != NULL) AND ((oc = sortKey.satisfiesReqdOrder( *rppForMe->getSortKey(), - before->getGroupAttr())) == DIFFERENT_ORDER)) + before->getGroupAttr(), + &bef->getComputedPredicates())) == DIFFERENT_ORDER)) indexQualifies = FALSE; // hbase does not support inverse order scan @@ -1318,7 +1317,8 @@ RelExpr * generateScanSubstitutes(RelExpr * before, (rppForMe->getArrangedCols() != NULL) AND NOT sortKey.satisfiesReqdArrangement( *rppForMe->getArrangedCols(), - before->getGroupAttr())) + before->getGroupAttr(), + &bef->getComputedPredicates())) indexQualifies = FALSE; } http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/optimizer/ItemConstr.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/ItemConstr.h b/core/sql/optimizer/ItemConstr.h index b248515..147a0c7 100644 --- a/core/sql/optimizer/ItemConstr.h +++ b/core/sql/optimizer/ItemConstr.h @@ -376,6 +376,45 @@ private: }; // ----------------------------------------------------------------------- +// Internal check constraint, e.g. a predicate used in synthesizing +// physical properties like a sort order. These may be needed to validate +// that a given sort order does indeed satisfy a requirement. +// ----------------------------------------------------------------------- +class CheckOptConstraint : public OptConstraint +{ + // ITM_CHECK_OPT_CONSTRAINT +public: + + // ctor for unique constraints discovered by the optimizer + CheckOptConstraint(const ValueIdSet &checkPreds) + : OptConstraint(ITM_CHECK_OPT_CONSTRAINT), checkPreds_(checkPreds) {} + + // convert the other kind of u.c. into this internal kind + //##useful extra info for Optimizer at some future point... + + virtual ~CheckOptConstraint(); + + // get the degree of this node (it is a leaf). + virtual Int32 getArity() const; + + virtual ItemExpr * copyTopNode(ItemExpr *derivedNode = NULL, + CollHeap* outHeap = 0); + + // accessor functions + const ValueIdSet &getCheckPreds() { return checkPreds_; } + + // get a printable string that identifies the operator + const NAString getText() const; + void unparse(NAString &result,PhaseEnum phase,UnparseFormatEnum form, + TableDesc * tabId = NULL) const; + +private: + + ValueIdSet checkPreds_; + +}; // CheckOptConstraint + +// ----------------------------------------------------------------------- // // Auxiliary classes and definitions used by the // referential integrity constraints, UniqueConstraint and RefConstraint. http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/optimizer/ItemExpr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/ItemExpr.cpp b/core/sql/optimizer/ItemExpr.cpp index 3bc1dcd..cde3298 100644 --- a/core/sql/optimizer/ItemExpr.cpp +++ b/core/sql/optimizer/ItemExpr.cpp @@ -714,7 +714,7 @@ ItemExpr* ItemExpr::getConstantInVEG() ItemExpr * ItemExpr::createMirrorPred(ItemExpr *compColPtr, ItemExpr * compColExprPtr, - ValueIdSet underlyingCols) + const ValueIdSet &underlyingCols) { CMPASSERT(compColPtr->getOperatorType() == ITM_BASECOLUMN); ValueIdSet eics = ((BaseColumn *)compColPtr)->getEIC(); @@ -5945,6 +5945,40 @@ void FuncDependencyConstraint::unparse(NAString &result, // ----------------------------------------------------------------------- +// member functions for class CheckOptConstraint +// ----------------------------------------------------------------------- +CheckOptConstraint::~CheckOptConstraint() {} + +Int32 CheckOptConstraint::getArity() const { return 0; } + +ItemExpr * CheckOptConstraint::copyTopNode(ItemExpr *derivedNode, + CollHeap* outHeap) +{ + ItemExpr *result; + + if (derivedNode == NULL) + result = new (outHeap) CheckOptConstraint(checkPreds_); + else + result = derivedNode; + + return OptConstraint::copyTopNode(result, outHeap); +} + +const NAString CheckOptConstraint::getText() const +{ + return "CheckOptConstraint"; +} + +void CheckOptConstraint::unparse(NAString &result, + PhaseEnum phase, + UnparseFormatEnum form, + TableDesc * tabId) const +{ + result += "CheckOptConstraint"; + checkPreds_.unparse(result,phase,form); +} + +// ----------------------------------------------------------------------- // member functions for class RefOptConstraint // ----------------------------------------------------------------------- Int32 RefOptConstraint::getArity() const { return 0;} http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/optimizer/ItemExpr.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/ItemExpr.h b/core/sql/optimizer/ItemExpr.h index 71518e4..c8364a1 100644 --- a/core/sql/optimizer/ItemExpr.h +++ b/core/sql/optimizer/ItemExpr.h @@ -601,8 +601,8 @@ public: ItemExpr* constFold(); ItemExpr * createMirrorPred(ItemExpr *compColPtr, - ItemExpr *compColExprPtr, - ValueIdSet underlyingCols); + ItemExpr *compColExprPtr, + const ValueIdSet &underlyingCols); //Does 'this' ItemExpr evaluate to constant e.g. //If we go by the strict definition then //Sin(Cos(1)+1) will return TRUE. http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/optimizer/OptPhysRelExpr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/OptPhysRelExpr.cpp b/core/sql/optimizer/OptPhysRelExpr.cpp index e31c295..35229aa 100644 --- a/core/sql/optimizer/OptPhysRelExpr.cpp +++ b/core/sql/optimizer/OptPhysRelExpr.cpp @@ -15171,7 +15171,8 @@ FileScan::synthPhysicalProperty(const Context* myContext, // --------------------------------------------------------------------- sortOrderVEG.removeCoveredExprs(getGroupAttr()->getCharacteristicInputs()); sortOrderVEG.complifyAndRemoveUncoveredSuffix( - getGroupAttr()->getCharacteristicOutputs()) ; + getGroupAttr()->getCharacteristicOutputs(), + getGroupAttr()); // --------------------------------------------------------------------- // if this is a reverse scan, apply an inversion function to http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/optimizer/PhyProp.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/PhyProp.cpp b/core/sql/optimizer/PhyProp.cpp index 6860b3e..1262a9a 100644 --- a/core/sql/optimizer/PhyProp.cpp +++ b/core/sql/optimizer/PhyProp.cpp @@ -262,7 +262,7 @@ PhysicalProperty::enforceCoverageByGroupAttributes (const GroupAttributes * grou // complifyAndRemoveUncoveredSuffix() had only characteristic output // in the argument. Now characteristic input is also added. coveringSet += groupAttr->getCharacteristicOutputs(); - newSortKey.complifyAndRemoveUncoveredSuffix(coveringSet); + newSortKey.complifyAndRemoveUncoveredSuffix(coveringSet, groupAttr); if (newSortKey.isEmpty()) { http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/optimizer/ValueDesc.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/ValueDesc.cpp b/core/sql/optimizer/ValueDesc.cpp index a23d2c9..372da1e 100644 --- a/core/sql/optimizer/ValueDesc.cpp +++ b/core/sql/optimizer/ValueDesc.cpp @@ -931,7 +931,8 @@ ValueIdList ValueIdList::findNJEquiJoinCols( // Otherwise, return the length of the prefix seen so far. If there // were no matches, then we will return 0. // ----------------------------------------------------------------------- -Int32 ValueIdList::complifyAndCheckPrefixCovered (const ValueIdSet& vidSet) +Int32 ValueIdList::complifyAndCheckPrefixCovered (const ValueIdSet& vidSet, + const GroupAttributes *ga) { // if the set's empty, clearly it doesn't cover anything! if ( vidSet.entries() == 0 ) return 0 ; @@ -1024,6 +1025,12 @@ Int32 ValueIdList::complifyAndCheckPrefixCovered (const ValueIdSet& vidSet) (*this)[i] = inverseCol->getValueId(); } } // end if the simp. set contains the simp. expresssion + else if (ga && ga->canEliminateOrderColumnBasedOnEqualsPred(thisVid)) + { + // if this column is constrained to a single value, then + // simply remove it from "this" + removeAt(i--); + } else return i; } @@ -1040,10 +1047,11 @@ Int32 ValueIdList::complifyAndCheckPrefixCovered (const ValueIdSet& vidSet) // corresponding versions in the provided set. // For the remaining suffix, remove those items from this list. // --------------------------------------------------------------------- -void ValueIdList::complifyAndRemoveUncoveredSuffix (const ValueIdSet& vidSet) +void ValueIdList::complifyAndRemoveUncoveredSuffix (const ValueIdSet& vidSet, + const GroupAttributes *ga) { // last covered id in this list - Int32 index = complifyAndCheckPrefixCovered(vidSet); + Int32 index = complifyAndCheckPrefixCovered(vidSet, ga); Int32 i = (Int32)entries() - 1; // index of last key column // Remove all entries following the index @@ -1263,52 +1271,72 @@ void ValueIdList::replaceVEGExpressions } // ValueIdList::replaceVEGExpressions() OrderComparison ValueIdList::satisfiesReqdOrder(const ValueIdList & reqdOrder, - const GroupAttributes *) const + GroupAttributes *ga, + const ValueIdSet *preds) const { - NAUnsigned numCols = reqdOrder.entries(); + NAUnsigned numReqdEntries = reqdOrder.entries(); OrderComparison allCols = SAME_ORDER; // sort order comparison of all columns together OrderComparison oneCol; // one column compared with its counterpart - // is the actual sort key as long as the required one? - // - // Note the following comparison does not consider duplications. - // such as this=[a, c] and reqOrder=[a, a, c] - // See OptPhysRelExpr.cpp for a fix for a case regarding union sort keys - // (search for 10-020913-1676) - if (entries() < numCols) - return DIFFERENT_ORDER; - - //Orignal condition: if (!OSIM_isNTbehavior() || (OSIM_isNTbehavior() && OSIM_runningSimulation())) - //For new OSIM, there won't be OSIM_isNTbehavior(), OSIM_isLinuxbehavior() or OSIM_isNSKbehavior(), - //we will assume OSIM_isLinuxbehavior() is always TRUE. - // - //The else block below never gets executed, - //if it is to be executed under certain conditions, - //please separate it from this if-conditon. - if (TRUE) - { - ItemExpr *ie = (*this)[0].getItemExpr(); - ItemExpr * reqdExpr = reqdOrder[0].getItemExpr(); - CollIndex i = 0; - CollIndex j = 0; - CollIndex numEntries = entries(); + CollIndex thisIx = 0; + CollIndex reqdIx = 0; + + ValueIdList tempThis(*this); + + // if group attributes are provided, remove any constant expressions, + // since those should not appear in the requirements and since they + // are not relevant to the ordering + if (ga) + tempThis.removeCoveredExprs(ga->getCharacteristicInputs()); + + CollIndex numActEntries = tempThis.entries(); - if (numEntries < numCols) + // we assume that reqdOrder went through a RequirementGenerator + // and is therefore already optimized (e.g. no duplicate columns) + + // is the actual sort key at least as long as the required one? + if (numActEntries < numReqdEntries) return DIFFERENT_ORDER; - for (CollIndex startCol = 0; startCol < numCols; startCol++) + for (CollIndex reqdIx = 0; reqdIx < numReqdEntries; reqdIx++) { - if ((reqdOrder[j] == (*this)[i])) - { - oneCol = SAME_ORDER; - } - else + NABoolean done = FALSE; + + // compare the next required order column with one or more + // ValueIds in tempThis + while (!done) { - oneCol = reqdOrder[j].getItemExpr()-> - sameOrder((*this)[i].getItemExpr()); + // make sure we have enough columns in "tempThis" + if (thisIx >= numActEntries) + return DIFFERENT_ORDER; + + // compare the next column of tempThis with the required order + oneCol = reqdOrder[reqdIx].getItemExpr()->sameOrder( + tempThis[thisIx].getItemExpr()); + + done = TRUE; + + // If we didn't find the column, then try to find an equals + // predicate that equates the column to a constant, so that + // we can skip over it. This typically happens with computed + // columns such as salt or division that don't have associated + // VEGs, which are handled by tempThis.removeCoveredExprs() above. + if (oneCol == DIFFERENT_ORDER && + ga && + (numActEntries-thisIx) > (numReqdEntries-reqdIx) && // extra cols in this + ga->tryToEliminateOrderColumnBasedOnEqualsPred(tempThis[thisIx], + preds)) + { + // this is a predicate of the form col = const + // and col is our current ValueId in tempThis, + // therefore skip over it and try again + thisIx++; + done = FALSE; + } } - if (startCol == 0) + + if (reqdIx == 0) allCols = oneCol; else allCols = combineOrderComparisons(allCols,oneCol); @@ -1316,43 +1344,15 @@ OrderComparison ValueIdList::satisfiesReqdOrder(const ValueIdList & reqdOrder, if (allCols == DIFFERENT_ORDER) return allCols; - i++; j++; - } - } - else{ - for (CollIndex i = 0; i < numCols; i++) - { - if ((reqdOrder[i] == (*this)[i])) - { - oneCol = SAME_ORDER; - } - else - { - // if the expressions are not exactly the same, try to match similar - // expressions that result in the same sorting order, like a and a+1 - oneCol = reqdOrder[i].getItemExpr()-> - sameOrder((*this)[i].getItemExpr()); - } - - // the first column determines whether we do the same order or the - // inverse order or nothing, all other columns have to follow that - // decision - if (i == 0) - allCols = oneCol; - else - allCols = combineOrderComparisons(allCols,oneCol); - - // did we already loose it? - if (allCols == DIFFERENT_ORDER) - return allCols; - } + thisIx++; } return allCols; } NABoolean ValueIdList::satisfiesReqdArrangement( const ValueIdSet &reqdArrangement, - const GroupAttributes * ga) const + GroupAttributes * ga, + const ValueIdSet *preds) const { // --------------------------------------------------------------------- // check requirement for arrangement of data (check whether the required @@ -1364,6 +1364,12 @@ NABoolean ValueIdList::satisfiesReqdArrangement( NABoolean found = TRUE; CollIndex startCol = 0; + // if group attributes are provided, remove any constant expressions, + // since those should not appear in the requirements and since they + // are not relevant to the ordering + if (ga) + arrCols.removeCoveredExprs(ga->getCharacteristicInputs()); + // walk along the sort key, as long as all columns in the sort // keys are part of the required set of arranged columns @@ -1401,6 +1407,16 @@ NABoolean ValueIdList::satisfiesReqdArrangement( arrCols -= x; } } + + if ((NOT found) && ga) + { + // if this column is constrained to a single value, + // we can skip over it and continue (we set found to + // true but don't remove anything from arrCols) + found = ga->tryToEliminateOrderColumnBasedOnEqualsPred( + (*this)[i], + preds); + } } } @@ -2930,6 +2946,7 @@ ValueIdSet ValueIdSet::createMirrorPreds(ValueId &computedCol, CMPASSERT( iePtr->getOperatorType() == ITM_BASECOLUMN ); ItemExpr *compExpr = ((BaseColumn *) iePtr)->getComputedColumnExpr().getItemExpr(); + ItemExpr *prevPred = NULL; for (ValueId kpv = init(); next(kpv); advance(kpv)) { @@ -2947,7 +2964,44 @@ ValueIdSet ValueIdSet::createMirrorPreds(ValueId &computedCol, { ItemExpr * newPred = piePtr->createMirrorPred(iePtr, compExpr, underlyingCols); if (newPred != NULL) - newComputedPreds += newPred->getValueId(); + { + // look for the following pattern, which is common + // when a range of values falls within one division + // col1 >= const1 AND col1 <= const1 + // and transform this into the single predicate col1 = const1 + if (prevPred) + { + OperatorTypeEnum prevOp = prevPred->getOperatorType(); + OperatorTypeEnum newOp = newPred->getOperatorType(); + NABoolean neg1, neg2; + ConstValue *prevConst = prevPred->child(1)->castToConstValue(neg1); + ConstValue *newConst = newPred->child(1)->castToConstValue(neg2); + + if ((prevOp == ITM_GREATER_EQ && newOp == ITM_LESS_EQ || + newOp == ITM_GREATER_EQ && prevOp == ITM_LESS_EQ) && + prevPred->child(0) == newPred->child(0) && + prevConst && newConst && + prevConst->duplicateMatch(*newConst) && neg1 == neg2 && + static_cast(prevPred)->getSpecialNulls() == + static_cast(newPred)->getSpecialNulls()) + { + // make the <= or >= predicate of the pattern into an = predicate + BiRelat *newEqualPred = new(CmpCommon::statementHeap()) + BiRelat(ITM_EQUAL, + newPred->child(0), + newPred->child(1)); + newEqualPred->setSpecialNulls( + static_cast(newPred)->getSpecialNulls()); + newEqualPred->synthTypeAndValueId(); + + newComputedPreds -= prevPred->getValueId(); + newPred = newEqualPred; + } + } + + newComputedPreds += newPred->getValueId(); + prevPred = newPred; + } break; } case ITM_ASSIGN: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/optimizer/ValueDesc.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/ValueDesc.h b/core/sql/optimizer/ValueDesc.h index 7a7a625..50e4634 100644 --- a/core/sql/optimizer/ValueDesc.h +++ b/core/sql/optimizer/ValueDesc.h @@ -469,7 +469,8 @@ public: // list with the one in the original provided set - i.e. "complify" // it. // --------------------------------------------------------------------- - Int32 complifyAndCheckPrefixCovered (const ValueIdSet& vidSet); + Int32 complifyAndCheckPrefixCovered (const ValueIdSet& vidSet, + const GroupAttributes *ga); // --------------------------------------------------------------------- // Check whether a prefix of this list is covered by the provided set. @@ -477,7 +478,8 @@ public: // their counterparts in the provided set. // For the remaining suffix, remove those items from this list. // --------------------------------------------------------------------- - void complifyAndRemoveUncoveredSuffix (const ValueIdSet& vidSet); + void complifyAndRemoveUncoveredSuffix (const ValueIdSet& vidSet, + const GroupAttributes *ga); // --------------------------------------------------------------------- // simplifyOrderExpr() @@ -512,11 +514,15 @@ public: // --------------------------------------------------------------------- // If a table is ordered by the expressions described in this list, // does the ordering satisfy a required order or arrangement of columns + // (GroupAttributes and predicates can be provided to allow more matches + // by applying some optimizations) // --------------------------------------------------------------------- OrderComparison satisfiesReqdOrder(const ValueIdList &reqdOrder, - const GroupAttributes *ga = NULL) const; + GroupAttributes *ga = NULL, + const ValueIdSet *preds = NULL) const; NABoolean satisfiesReqdArrangement(const ValueIdSet &reqdArrangement, - const GroupAttributes *ga = NULL) const; + GroupAttributes *ga = NULL, + const ValueIdSet *preds = NULL) const; // --------------------------------------------------------------------- // Calculate the length of the row containing all the value ids http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/regress/compGeneral/EXPECTED071 ---------------------------------------------------------------------- diff --git a/core/sql/regress/compGeneral/EXPECTED071 b/core/sql/regress/compGeneral/EXPECTED071 index 5fd689b..a5a4d39 100644 --- a/core/sql/regress/compGeneral/EXPECTED071 +++ b/core/sql/regress/compGeneral/EXPECTED071 @@ -668,6 +668,53 @@ TRAFODION_VSBB_DELETE TRAFODION.MTD.MTD2 --- 0 row(s) selected. >> +>>-- test elimination of division column for single-division queries with order by +>>explain options 'f' ++>select * ++>from mtd1 ++>where sale_date between date '2000-01-01' and date '2000-01-21' ++>order by store_id; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +1 . 2 root 1.10E+001 +. . 1 trafodion_scan MTD1 1.10E+001 + +--- SQL operation complete. +>>-- expect no sort operator, for a single division, table is sorted on store_id +>> +>>explain options 'f' ++>select * ++>from mtd2 ++>where sale_date between date '2000-01-01' and date '2000-01-21' ++>order by store_id; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +1 . 2 root 1.10E+001 +. . 1 trafodion_scan MTD2 1.10E+001 + +--- SQL operation complete. +>>-- expect no sort operator, for a single division, table is sorted on store_id +>> +>>explain options 'f' ++>select * ++>from mtd2 ++>where sale_date between date '2000-01-01' and date '2000-02-21' ++>order by store_id; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +2 . 3 root 1.10E+001 +1 . 2 sort 1.10E+001 +. . 1 trafodion_scan MTD2 1.10E+001 + +--- SQL operation complete. +>>-- expect a sort operator, since we are querying more than one division +>> >>-- some simple join queries, validate min/max optimization for divisioned table >> >>prepare min_max_opt_query from http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/regress/compGeneral/TEST071 ---------------------------------------------------------------------- diff --git a/core/sql/regress/compGeneral/TEST071 b/core/sql/regress/compGeneral/TEST071 index 41dac6b..cf42391 100644 --- a/core/sql/regress/compGeneral/TEST071 +++ b/core/sql/regress/compGeneral/TEST071 @@ -376,6 +376,28 @@ execute s; select *, "_DIVISION_1_" from mtd2 order by 1, 2, 3; +-- test elimination of division column for single-division queries with order by +explain options 'f' +select * +from mtd1 +where sale_date between date '2000-01-01' and date '2000-01-21' +order by store_id; +-- expect no sort operator, for a single division, table is sorted on store_id + +explain options 'f' +select * +from mtd2 +where sale_date between date '2000-01-01' and date '2000-01-21' +order by store_id; +-- expect no sort operator, for a single division, table is sorted on store_id + +explain options 'f' +select * +from mtd2 +where sale_date between date '2000-01-01' and date '2000-02-21' +order by store_id; +-- expect a sort operator, since we are querying more than one division + -- some simple join queries, validate min/max optimization for divisioned table prepare min_max_opt_query from http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/regress/seabase/EXPECTED010 ---------------------------------------------------------------------- diff --git a/core/sql/regress/seabase/EXPECTED010 b/core/sql/regress/seabase/EXPECTED010 index b4cbf66..726905c 100644 --- a/core/sql/regress/seabase/EXPECTED010 +++ b/core/sql/regress/seabase/EXPECTED010 @@ -192,6 +192,20 @@ LC RC OP OPERATOR OPT DESCRIPTION CARD --- SQL operation complete. >> +>>-- should see no sort operator when selecting from one salt bucket +>>prepare xx from select * from t010t3 where (a,b) = (1,'b') order by c; + +--- SQL command prepared. +>>explain options 'f' xx; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +1 . 2 root 1.00E+000 +. . 1 trafodion_scan T010T3 1.00E+000 + +--- SQL operation complete. +>> >>-- selectPred should present due to the 2nd disjunct b='1' >>prepare xx from select * from t010t1 where a=1 or b='1'; @@ -1843,6 +1857,20 @@ LC RC OP OPERATOR OPT DESCRIPTION CARD --- SQL operation complete. >> +>>-- should see no sort operator when selecting from one salt bucket +>>prepare xx from select * from t010t3 where (a,b) = (1,'b') order by c; + +--- SQL command prepared. +>>explain options 'f' xx; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +1 . 2 root 1.00E+000 +. . 1 trafodion_scan T010T3 1.00E+000 + +--- SQL operation complete. +>> >>-- selectPred should present due to the 2nd disjunct b='1' >>prepare xx from select * from t010t1 where a=1 or b='1'; @@ -3515,6 +3543,20 @@ LC RC OP OPERATOR OPT DESCRIPTION CARD --- SQL operation complete. >> +>>-- should see no sort operator when selecting from one salt bucket +>>prepare xx from select * from t010t3 where (a,b) = (1,'b') order by c; + +--- SQL command prepared. +>>explain options 'f' xx; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +1 . 2 root 1.00E+000 +. . 1 trafodion_scan T010T3 1.00E+000 + +--- SQL operation complete. +>> >>-- selectPred should present due to the 2nd disjunct b='1' >>prepare xx from select * from t010t1 where a=1 or b='1'; @@ -5172,6 +5214,20 @@ LC RC OP OPERATOR OPT DESCRIPTION CARD --- SQL operation complete. >> +>>-- should see no sort operator when selecting from one salt bucket +>>prepare xx from select * from t010t3 where (a,b) = (1,'b') order by c; + +--- SQL command prepared. +>>explain options 'f' xx; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +1 . 2 root 1.00E+000 +. . 1 trafodion_scan T010T3 1.00E+000 + +--- SQL operation complete. +>> >>-- selectPred should present due to the 2nd disjunct b='1' >>prepare xx from select * from t010t1 where a=1 or b='1'; @@ -6835,6 +6891,20 @@ LC RC OP OPERATOR OPT DESCRIPTION CARD --- SQL operation complete. >> +>>-- should see no sort operator when selecting from one salt bucket +>>prepare xx from select * from t010t3 where (a,b) = (1,'b') order by c; + +--- SQL command prepared. +>>explain options 'f' xx; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +1 . 2 root 1.00E+000 +. . 1 trafodion_scan T010T3 1.00E+000 + +--- SQL operation complete. +>> >>-- selectPred should present due to the 2nd disjunct b='1' >>prepare xx from select * from t010t1 where a=1 or b='1'; @@ -8484,6 +8554,20 @@ LC RC OP OPERATOR OPT DESCRIPTION CARD --- SQL operation complete. >> +>>-- should see no sort operator when selecting from one salt bucket +>>prepare xx from select * from t010t3 where (a,b) = (1,'b') order by c; + +--- SQL command prepared. +>>explain options 'f' xx; + +LC RC OP OPERATOR OPT DESCRIPTION CARD +---- ---- ---- -------------------- -------- -------------------- --------- + +1 . 2 root 1.00E+000 +. . 1 trafodion_scan T010T3 1.00E+000 + +--- SQL operation complete. +>> >>-- selectPred should present due to the 2nd disjunct b='1' >>prepare xx from select * from t010t1 where a=1 or b='1'; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/regress/seabase/TEST010 ---------------------------------------------------------------------- diff --git a/core/sql/regress/seabase/TEST010 b/core/sql/regress/seabase/TEST010 index ed89468..4efc187 100644 --- a/core/sql/regress/seabase/TEST010 +++ b/core/sql/regress/seabase/TEST010 @@ -133,6 +133,10 @@ explain options 'f' xx; prepare xx from select * from t010t1 order by b; explain options 'f' xx; +-- should see no sort operator when selecting from one salt bucket +prepare xx from select * from t010t3 where (a,b) = (1,'b') order by c; +explain options 'f' xx; + -- selectPred should present due to the 2nd disjunct b='1' prepare xx from select * from t010t1 where a=1 or b='1'; explain xx; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/680900cd/core/sql/regress/seabase/TEST014 ---------------------------------------------------------------------- diff --git a/core/sql/regress/seabase/TEST014 b/core/sql/regress/seabase/TEST014 index 6ca61fe..3fdab85 100644 --- a/core/sql/regress/seabase/TEST014 +++ b/core/sql/regress/seabase/TEST014 @@ -40,6 +40,7 @@ cqd query_cache '0'; obey TEST014(test_ddl_disable_partition); log; +obey TEST014(clean_up); exit; ?section clean_up