trafodion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sure...@apache.org
Subject [1/6] incubator-trafodion git commit: [TRAFODION-2140] Move IUS feature to open source
Date Wed, 10 Aug 2016 22:59:42 GMT
Repository: incubator-trafodion
Updated Branches:
  refs/heads/master 3cdea3f9b -> dda167cb0


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_globals.h
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.h b/core/sql/ustat/hs_globals.h
index e3d22e1..10381a8 100644
--- a/core/sql/ustat/hs_globals.h
+++ b/core/sql/ustat/hs_globals.h
@@ -73,7 +73,7 @@ class AbstractFastStatsHist;
 Lng32 AddNecessaryColumns();
 Lng32 AddAllColumnsForIUS();
 
-Lng32 createSampleOption(Lng32 sampleType, double samplePercent, NAString &sampleOpt,
+void createSampleOption(Lng32 sampleType, double samplePercent, NAString &sampleOpt,
                         Int64 sampleValue1=0, Int64 sampleValue2=0);
 Lng32 doubleToHSDataBuffer(const double dbl, HSDataBuffer& dbf);
 Lng32 managePersistentSamples();
@@ -1135,6 +1135,7 @@ struct HSColGroupStruct : public NABasicObject
     Int64            prevUEC;                      /* uec from existing histogram */
     Int64            colSecs;                      /* Time to sort/group data for column
*/
     CountingBloomFilter* cbf;                      /* A bloom filter for IUS */
+    NAString& cbfFileNameSuffix() { return *colSet[0].colname; }
 
     void* boundaryValues;                          /* List of bounary values for IUS */
     void* MFVValues;                               /* List of MFV values for IUS */
@@ -1157,6 +1158,7 @@ struct HSColGroupStruct : public NABasicObject
                                                    /* are used by its neighbors. Used to
compute group weight */
 
     NABoolean allKeysInsertedIntoCBF;
+    Int32            backwardWarningCount;          // for UERR_UNEXPECTED_BACKWARDS_DATA
warnings
 
     #ifdef _TEST_ALLOC_FAILURE
     // Stuff used to test memory allocation failures.
@@ -1202,13 +1204,7 @@ class IUSValueIterator
     virtual ~IUSValueIterator()
     {}
     
-    void init(HSColGroupStruct* group)
-    {
-      // Strings must be contiguous in the strData buffer for this iterator to
-      // work correctly.
-      HS_ASSERT(group->strDataConsecutive);
-      vp = (T*)group->data;
-    }
+    void init(HSColGroupStruct* group);
     
     void next()
     {
@@ -1357,6 +1353,9 @@ public:
     void getMemoryRequirementsForOneMCGroup(HSColGroupStruct* group, Int64 rows);
 
     static Int32 allocateMemoryForColumns(HSColGroupStruct* group, Int64 rows, HSColGroupStruct*
mgr = NULL /* used for MC IS */);
+    static Int32 allocateMemoryForIUSColumns(HSColGroupStruct* group, Int64 rows,
+                                             HSColGroupStruct* delGroup, Int64 delRows,
+                                             HSColGroupStruct* insGroup, Int64 insRows);
 
     // For internal sort or IUS, remove and count nulls for each column from the
     // rowset just read.
@@ -1484,6 +1483,9 @@ public:
     //Log the current contents of this class.
     void log(HSLogMan* LM);
 
+    // Takes action necessary before throwing exception for an assertion failure.
+    void preAssertionFailure(const char* condition, const char* fileName, Lng32 lineNum);
+
     // Derive a return code from the contents of the diagnostics area.
     Lng32 getRetcodeFromDiags();
 
@@ -1502,14 +1504,20 @@ public:
                                              NABoolean forceToFetch = TRUE);
     Lng32 updatePersistentSampleTableForIUS(NAString& sampleTableName, double sampleRate,
                                             NAString& targetTableName);
+    void generateIUSDeleteQuery(const NAString& smplTable, NAString& queryText);
+    void generateIUSSelectInsertQuery(const NAString& smplTable,
+                                      const NAString& sourceTable,
+                                      NAString& queryText);
+    void getCBFFilePrefix(NAString& sampleTableName, NAString& filePrefix);
     void detectPersistentCBFsForIUS(NAString& sampleTableName, HSColGroupStruct *group);
+    Lng32 UpdateIUSPersistentSampleTable(Int64 oldSampleSize, Int64 requestedSampleSize,
Int64& newSampleSize);
     Lng32 readCBFsIntoMemForIUS(NAString& sampleTableName, HSColGroupStruct* group);
     Lng32 writeCBFstoDiskForIUS(NAString& sampleTableName, HSColGroupStruct* group);
-    Lng32 deletePersistentCBFsForIUS(NAString& sampleTableName, HSColGroupStruct* group);
+    Lng32 deletePersistentCBFsForIUS(NAString& sampleTableName, HSColGroupStruct* group,
SortState stateToDelete);
 
     void logDiagArea(const char* title);
 
-    Lng32 begin_IUS_work(char* buffer);
+    Lng32 begin_IUS_work();
     Lng32 end_IUS_work();
 
     // Populate the hash table used to determine when a ustat statement has run
@@ -1579,6 +1587,7 @@ public:
     Int64          numPartitions;                  /* # of partns in object   */
     NAString      *hstogram_table;                 /* HISTOGRM table          */
     NAString      *hsintval_table;                 /* HISTINTS table          */
+    NAString      *hsperssamp_table;               /* PERSISTENT_SAMPLES table */
     NAString      *hssample_table;                 /* SAMPLING table          */
     NABoolean      externalSampleTable;            /* ownership of sample tab */
     hs_table_type  tableType;                      /* GUARDIAN | ANSI format  */
@@ -1727,6 +1736,10 @@ private:
                          NABoolean internalSortWhenBetter,
                          NABoolean trySampleTableBypass = FALSE);
 
+    // After an allocation failure, this is called to reduce the amount of
+    // memory we estimate is available.
+    static void memReduceAllowance();
+
     // When a memory allocation fails, return any memory already allocated for
     // the group for internal sort, and set any PENDING columns back to
     // UNPROCESSED state.  This function cannot fail.
@@ -1756,6 +1769,17 @@ private:
 
     // Collect statistics by incrementally updating persistent sample table and
     // possibly histograms as well.
+    Lng32 doIUS(NABoolean& done);
+
+    // Collect stats by incrementally updating histograms where possible. Persistent
+    // sample is also incrementally updated.
+    Lng32 doFullIUS(Int64 currentSampleSize, Int64 futureSampleSize, NABoolean& done);
+
+    // Causes persistent sample table to be incrementally updated, and other
+    // preparatory tasks so RUS can be performed using persistent sample.
+    Lng32 prepareToUsePersistentSample (Int64 currentSampleSize, Int64 futureSampleSize);
+
+    // Incrementally update histograms for a selected batch of columns
     Lng32 CollectStatisticsForIUS(Int64 currentSampleSize, Int64 futureSampleSize);
 
     //
@@ -1798,6 +1822,18 @@ private:
     HSInMemoryTable* iusSampleDeletedInMem;
     HSInMemoryTable* iusSampleInsertedInMem;
 
+    // used by IUS code for clean up purposes
+    NABoolean sampleIExists_;
+
+    // For IUS, once the persistent sample table has been successfully updated
+    // in accordance with the IUS predicate, these ptrs will point to the requested
+    // (expected) and actual number of rows in the sample table. end_IUS_work will
+    // pass these ptrs to the function that updates the sample table's row in
+    // SB_PERSISTENT_SAMPLES. If non-null, the values are used for the corresponding
+    // columns in that table.
+    Int64* PST_IUSrequestedSampleRows_;
+    Int64* PST_IUSactualSampleRows_;
+
     template <class T>
     Int32 processIUSColumn(T* ptr,
                            const NAWchar* format,
@@ -1883,6 +1919,36 @@ private:
                        HSColGroupStruct* insGroup, Int64 insrows);
 
     template <class T>
+    class HSHiLowValues
+      {
+        public:
+
+          NABoolean seenAtLeastOneValue_;  // initially FALSE
+          // the next two are valid only if seenAtLeastOneValue_ is TRUE
+          T hiValue_;  // highest value seen so far
+          T lowValue_; // lowest value seen so far
+
+          HSHiLowValues() : seenAtLeastOneValue_(FALSE) { };
+
+          void findHiLowValues(T& val)
+            {
+              if (seenAtLeastOneValue_)
+                {
+                  if (val < lowValue_)
+                    lowValue_ = val;
+                  else if (val > hiValue_)
+                    hiValue_ = val;
+                }
+              else
+                {
+                  seenAtLeastOneValue_ = TRUE;
+                  lowValue_ = val;
+                  hiValue_ = val;
+                } 
+            };
+      };
+
+    template <class T>
     Int16 findInterval(Int16 numInt, T* boundaries, T& val)
       {
         Int16 low = 1;
@@ -1916,6 +1982,13 @@ private:
     Int64 stmtStartTime;
     NABoolean jitLogOn;
 
+    // For IUS, was the SB_PERSISTENT_SAMPLES row for the source table updated?
+    // The change is undone by the HSGlobalsClass dtor, so we need to account for
+    // the possibility that an IUS statement failed prior to making the change.
+    // Otherwise, a concurrent IUS operation could have its changes to the row
+    // overwritten.
+    NABoolean PSRowUpdated;
+
     static THREAD_P NABoolean performISForMC_;
 
   };  // class HSGlobalsClass
@@ -2165,6 +2238,8 @@ public:
     void setHasNull(NABoolean val) { hasNull_ = val; }
     void setIntBoundary(const Lng32 intNum, const char* value, Int16 len)
       { intArry_[intNum].boundary_.copyFrom(value, len, TRUE); }
+    void setIntBoundary(const Lng32 intNum, const HSDataBuffer & newBoundary)
+      { intArry_[intNum].boundary_ = newBoundary; }
     void setIntMFVValue(const Lng32 intNum, const char* value, Int16 len)
       { intArry_[intNum].mostFreqVal_.copyFrom(value, len, TRUE); }
 
@@ -2449,11 +2524,6 @@ class HSInMemoryTable : public NABasicObject
     void generateSelectDQuery(NAString& smplTable, NAString& queryTex);
     void generateSelectIQuery(NAString& smplTable, NAString& queryText);
 
-    void generateDeleteQuery(NAString& smplTable, NAString& queryText);
-
-    void generateSelectInsertQuery(NAString& smplTable, NAString& sourceTable,
-                                   NAString& queryText);
-
 
     // method for algorithm 1
     void generateDeleteQuery(NAString& smplTable, NAString& queryText, NABoolean
rollback);
@@ -2462,6 +2532,16 @@ class HSInMemoryTable : public NABasicObject
    
     Lng32 populate(NAString& queryText);
 
+    // The data is actually deallocated by calling freeISMemory() from
+    // HSGlobalsClass::incrementHistograms() for each column as soon as the
+    // column is successfully handled by IUS (the data is preserved for use
+    // by RUS/IS if IUS can't be performed). This function just resets the
+    // flag that would cause assertion failure when populate() is called, as
+    // it must be to load data for the next batch of IUS columns.
+    void depopulate() {
+      isPopulated_ = FALSE;
+    }
+
     void logState(const char* title);
 
   private:

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_la.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_la.cpp b/core/sql/ustat/hs_la.cpp
index ea00289..068aef1 100644
--- a/core/sql/ustat/hs_la.cpp
+++ b/core/sql/ustat/hs_la.cpp
@@ -952,6 +952,18 @@ NABoolean HSHiveTableDef::objExists(NABoolean createExternalTable)
   return TRUE;
 }
 
+NAString HSHiveTableDef::getNodeName() const
+  {
+    HS_ASSERT(FALSE);  // MP only
+    return "";
+  }
+
+NAString HSHiveTableDef::getCatalogLoc(formatType format) const
+  {
+    HS_ASSERT(FALSE);  // MP only
+    return "";
+  }
+
 NAString HSHiveTableDef::getHistLoc(formatType format) const
 {
   return HIVE_STATS_CATALOG "." HIVE_STATS_SCHEMA;
@@ -1020,6 +1032,18 @@ Lng32 HSHiveTableDef::DescribeColumnNames()
 
 //=====================================================
 //
+NAString HSHbaseTableDef::getNodeName() const
+  {
+    HS_ASSERT(FALSE);  // MP only
+    return "";
+  }
+
+NAString HSHbaseTableDef::getCatalogLoc(formatType format) const
+  {
+    HS_ASSERT(FALSE);  // MP only
+    return "";
+  }
+
 NAString HSHbaseTableDef::getHistLoc(formatType format) const
 {
   if ( HSGlobalsClass::isNativeHbaseCat(getCatName(format))) {

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_la.h
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_la.h b/core/sql/ustat/hs_la.h
index 3344feb..158b905 100644
--- a/core/sql/ustat/hs_la.h
+++ b/core/sql/ustat/hs_la.h
@@ -237,16 +237,8 @@ class HSHiveTableDef : public HSTableDef
       {
         return FALSE;
       };
-    NAString getNodeName() const
-      {
-        HS_ASSERT(FALSE);  // MP only
-        return "";
-      }
-    NAString getCatalogLoc(formatType format = INTERNAL_FORMAT) const
-      {
-        HS_ASSERT(FALSE);  // MP only
-        return "";
-      }
+    NAString getNodeName() const;
+    NAString getCatalogLoc(formatType format = INTERNAL_FORMAT) const;
     NAString getHistLoc(formatType format = INTERNAL_FORMAT) const;
     Lng32 getFileType()  const
       {
@@ -348,16 +340,8 @@ class HSHbaseTableDef : public HSTableDef
       {
         return FALSE;
       };
-    NAString getNodeName() const
-      {
-        HS_ASSERT(FALSE);  // MP only
-        return "";
-      }
-    NAString getCatalogLoc(formatType format = INTERNAL_FORMAT) const
-      {
-        HS_ASSERT(FALSE);  // MP only
-        return "";
-      }
+    NAString getNodeName() const;
+    NAString getCatalogLoc(formatType format = INTERNAL_FORMAT) const;
     NAString getHistLoc(formatType format = INTERNAL_FORMAT) const;
 
     Lng32 getFileType()  const

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_log.h
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_log.h b/core/sql/ustat/hs_log.h
index da846dc..18e6ec5 100644
--- a/core/sql/ustat/hs_log.h
+++ b/core/sql/ustat/hs_log.h
@@ -135,21 +135,16 @@ private:
 // Log the location of the error.
 void HSFuncLogError(Lng32 error, char *filename, Lng32 lineno);
 
-// Wrapper to handle assertion failure.
+// Wrapper to handle assertion failure. Do not assert a condition with any
+// side effects, as it is evaluated a second time if false.
 #define HS_ASSERT(b)                                        \
+      {                                                     \
         if (NOT (b))                                        \
           {                                                 \
-            HSTranMan *TM = HSTranMan::Instance();          \
-            HSLogMan *LM = HSLogMan::Instance();            \
-            if (LM->LogNeeded())                            \
-              {                                             \
-                sprintf(LM->msg, "***[ERROR] INTERNAL ASSERTION (%s) AT %s:%i", "" # b
"", __FILE__, __LINE__); \
-                LM->Log(LM->msg);                           \
-              }                                             \
-            if (TM->StartedTransaction())                   \
-              TM->Rollback();                               \
+            GetHSContext()->preAssertionFailure("" # b "", __FILE__, __LINE__); \
             CMPASSERT(b);                                   \
-          }
+          }                                                 \
+      }
  
 //Ignore the following WARNINGS
 //    [6008] missing single-column histograms
@@ -157,11 +152,13 @@ void HSFuncLogError(Lng32 error, char *filename, Lng32 lineno);
 //    [4030] non-standard DATETIME format
 //    [4]    internal Warning
 #define HSFilterWarning(retcode) \
+        { \
           if ((retcode == 6008) || \
               (retcode == 6007) || \
               (retcode == 4030) || \
               (retcode == HS_WARNING)) \
-            retcode = 0;
+            retcode = 0; \
+        }
 
 // Map any error (<0) code other than HS_PKEY_FLOAT_ERROR to -1.
 #define  HSFilterError(retcode) \

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_parser.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_parser.cpp b/core/sql/ustat/hs_parser.cpp
index 11adbc1..d96acc7 100644
--- a/core/sql/ustat/hs_parser.cpp
+++ b/core/sql/ustat/hs_parser.cpp
@@ -462,15 +462,19 @@ Lng32 AddTableName( const hs_table_type type
 
         *hs_globals->hsintval_table = getHistogramsTableLocation(hs_globals->catSch->data(),
FALSE);
 
+        *hs_globals->hsperssamp_table = getHistogramsTableLocation(hs_globals->catSch->data(),
FALSE);
+
         NABoolean isHbaseOrHive = HSGlobalsClass::isHbaseCat(catName) ||
                                   HSGlobalsClass::isHiveCat(catName);
 
         if (isHbaseOrHive) {
           hs_globals->hstogram_table->append(".").append(HBASE_HIST_NAME);
           hs_globals->hsintval_table->append(".").append(HBASE_HISTINT_NAME);
+          hs_globals->hsperssamp_table->append(".").append(HBASE_PERS_SAMP_NAME);
         } else {
           hs_globals->hstogram_table->append(".HISTOGRAMS");
           hs_globals->hsintval_table->append(".HISTOGRAM_INTERVALS");
+          hs_globals->hsperssamp_table->append(".PERSISTENT_SAMPLES");
         }
       }
     else

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_update.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_update.cpp b/core/sql/ustat/hs_update.cpp
index e8ca28e..4dd70d6 100644
--- a/core/sql/ustat/hs_update.cpp
+++ b/core/sql/ustat/hs_update.cpp
@@ -501,9 +501,6 @@ Lng32 UpdateStats(char *input, NABoolean requestedByCompiler)
 
     HSClearCLIDiagnostics();
 
-    if ( hs_globals_obj.canDoIUS() )
-      hs_globals_obj.end_IUS_work();
-
     hs_globals_y = NULL;
 
     // Remove IUS persistent sample if necessary.

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_yacc.y
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_yacc.y b/core/sql/ustat/hs_yacc.y
index 0007659..0ee1ca4 100644
--- a/core/sql/ustat/hs_yacc.y
+++ b/core/sql/ustat/hs_yacc.y
@@ -353,19 +353,18 @@ histogram_options : CLEAR
                       {
                         hs_globals_y->optFlags |= CLEAR_OPT;
                       }
-                 |  CREATE SAMPLE sample_clause_for_createremove
+                 |  CREATE SAMPLE random_clause
                       {
-                        if (hs_globals_y->optFlags & SAMPLE_ALL)
+                        if (hs_globals_y->optFlags & SAMPLE_RAND_2)
                           {
-                            HSFuncMergeDiags(- UERR_INVALID_OPTION,
-                                             "ALL",
-                                             "RANDOM X PERCENT or r ROWS");
+                            HSFuncMergeDiags(-UERR_IUS_WRONG_RANDOM);
                             hs_globals_y->parserError = HSGlobalsClass::ERROR_SEMANTICS;
                             return -1;
                           }
+
                         hs_globals_y->optFlags |= CREATE_SAMPLE_OPT;
                       }
-                 |  REMOVE SAMPLE sample_clause_for_createremove
+                 |  REMOVE SAMPLE
                       {
                         hs_globals_y->optFlags |= REMOVE_SAMPLE_OPT;
                       }
@@ -402,37 +401,6 @@ histogram_options : CLEAR
                  | /* empty */
 ;
 
-sample_clause_for_createremove: ALL
-                      { 
-                        hs_globals_y->optFlags |= SAMPLE_ALL;
-                      }
-                 |  int_number ROWS
-                      {
-                        if ($1 <= 0)
-                          {
-                            HSFuncMergeDiags(- UERR_INVALID_OPTION,
-                                             "SAMPLE ROWS",
-                                             "an integer greater than or equal to 0 and within
limits");
-                            hs_globals_y->parserError = HSGlobalsClass::ERROR_SEMANTICS;
-                            return -1;
-                          }
-                        hs_globals_y->optFlags |= SAMPLE_BASIC_1;
-                        hs_globals_y->sampleValue1 = $1;
-                      }
-                 |  RANDOM real_number TOK_PERCENT
-                      {
-                        if ($2 <= 0 || $2 >= 100)
-                          {
-                            HSFuncMergeDiags(- UERR_INVALID_OPTION,
-                                             "PERCENT",
-                                             "a value between 0 and 100");
-                            hs_globals_y->parserError = HSGlobalsClass::ERROR_SEMANTICS;
-                            return -1;
-                          }
-                        hs_globals_y->optFlags |= SAMPLE_RAND_1;
-                        hs_globals_y->sampleValue1 = (Int64)($2 * HS_SAMP_PCNT_UPSCALE);
-                      }
-;
 
 on_clause :         ON predefined_groups
                  |  ON predefined_groups ',' regular_group_list
@@ -463,7 +431,10 @@ incremental_clause :   INCREMENTAL WHERE WHERE_CONDITION
                        if (LM->LogNeeded() )
                          LM->Log("incremental clause identified");
                            
-                         
+                       if (CmpCommon::getDefault(USTAT_INCREMENTAL_UPDATE_STATISTICS) ==
DF_OFF) {
+                         HSFuncMergeDiags(-UERR_IUS_IS_DISABLED);
+                       }
+                       
                        if (CmpCommon::getDefault(USTAT_IUS_SIMPLE_SYNTAX) == DF_ON) {
                          // Via grammar, the incremental clause can only used without the
                          // on_clause. So it is safe to set the IUS_OPT flag.
@@ -585,17 +556,17 @@ sample_clause_body : SAMPLE
                         if (LM->LogNeeded() )
                           LM->Log("Creation of persistent sample table for IUS requested");
 
-                        if (hs_globals_y->optFlags & (REG_GROUP_OPT | EVERYCOL_OPT
| EVERYKEY_OPT))
-                          HSFuncMergeDiags(-UERR_WRONG_ON_CLAUSE_FOR_IUS, "PERSISTENT");
-                        else if (hs_globals_y->optFlags & SAMPLE_RAND_2)
+                        // Formerly there was code here to limit PERSISTENT to 
+                        // ON EXISTING COLUMNS and ON NECESSARY COLUMNS (EXISTING_OPT and
+                        // NECESSARY_OPT in the optFlags). But there doesn't seem to be
+                        // a compelling reason for this limitation. The persistent sample
+                        // table will have all the columns of the base table regardless
+                        // of the ON clause.
+
+                        if (hs_globals_y->optFlags & SAMPLE_RAND_2)
                           HSFuncMergeDiags(-UERR_IUS_WRONG_RANDOM);
                         else
-                          {
-                            // This assert is here to make sure we covered all other possible
ON
-                            // clauses in the check above.
-                            HS_ASSERT(hs_globals_y->optFlags & (EXISTING_OPT | NECESSARY_OPT));
-                            hs_globals_y->optFlags |= IUS_PERSIST;
-                          }
+                          hs_globals_y->optFlags |= IUS_PERSIST;
                       }
                  |  SAMPLE random_clause rowcount_clause
                  |  SAMPLE periodic_clause


Mime
View raw message