trafodion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dbirds...@apache.org
Subject [1/3] incubator-trafodion git commit: [TRAFODION-2251] Fix upd stats issues with long char/varchar columns
Date Mon, 10 Oct 2016 16:29:49 GMT
Repository: incubator-trafodion
Updated Branches:
  refs/heads/master be54167b8 -> 12f602cab


[TRAFODION-2251] Fix upd stats issues with long char/varchar columns


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/8a7fe537
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/8a7fe537
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/8a7fe537

Branch: refs/heads/master
Commit: 8a7fe53736fb5720199e23864910df3cb092155d
Parents: 1e94882
Author: Dave Birdsall <dbirdsall@apache.org>
Authored: Tue Oct 4 21:35:42 2016 +0000
Committer: Dave Birdsall <dbirdsall@apache.org>
Committed: Tue Oct 4 21:35:42 2016 +0000

----------------------------------------------------------------------
 core/sql/executor/ExExeUtilLoad.cpp |   3 +-
 core/sql/ustat/hs_cli.cpp           | 103 ++++++++++++++++++++++++++-----
 core/sql/ustat/hs_cli.h             |   4 ++
 core/sql/ustat/hs_globals.cpp       |  19 +++++-
 core/sql/ustat/hs_globals.h         |  12 ++++
 core/sql/ustat/hs_log.h             |   2 +
 core/sql/ustat/hs_parser.cpp        |   7 +++
 7 files changed, 133 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8a7fe537/core/sql/executor/ExExeUtilLoad.cpp
----------------------------------------------------------------------
diff --git a/core/sql/executor/ExExeUtilLoad.cpp b/core/sql/executor/ExExeUtilLoad.cpp
index 08b13db..d11f182 100644
--- a/core/sql/executor/ExExeUtilLoad.cpp
+++ b/core/sql/executor/ExExeUtilLoad.cpp
@@ -132,7 +132,8 @@ short ExExeUtilCreateTableAsTcb::work()
 	  {
 	    NABoolean xnAlreadyStarted = ta->xnInProgress();
 
-	    if (xnAlreadyStarted)
+	    // allow a user transaction if NO LOAD was specified
+	    if (xnAlreadyStarted && !ctaTdb().noLoad())
               {
                 *getDiagsArea() << DgSqlCode(-20123)
                                 << DgString0("This DDL operation");

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8a7fe537/core/sql/ustat/hs_cli.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_cli.cpp b/core/sql/ustat/hs_cli.cpp
index 1260566..878e5a2 100644
--- a/core/sql/ustat/hs_cli.cpp
+++ b/core/sql/ustat/hs_cli.cpp
@@ -504,6 +504,8 @@ Lng32 HSSample::create(NAString& tblName, NABoolean unpartitioned,
NABoolean isP
     NAString tempTabName = tblName;
     NAString userTabName = objDef->getObjectFullName();
 
+    HSGlobalsClass *hs_globals = GetHSContext();
+
     // If the table is a native one, convert the fully qualified user table name NT
     // to a fully qualified external table name ET. The sample table will be created
     // like ET.
@@ -542,21 +544,37 @@ Lng32 HSSample::create(NAString& tblName, NABoolean unpartitioned,
NABoolean isP
 
         ddl  = "CREATE TABLE ";
         ddl += tempTabName;
-        ddl += " LIKE ";
-
-        // is this an MV LOG table?
-        if (objDef->getNameSpace() == COM_IUD_LOG_TABLE_NAME)
-        {
-          ddl += "TABLE (IUD_LOG_TABLE ";
-          ddl += userTabName;
-          ddl += ") ";
-        }
+        if (hs_globals->hasOversizedColumns)
+          {
+            // Use CREATE TABLE AS SELECT when we have to modify the column lengths
+            // (this happens for tables having very long chars/varchars). One 
+            // peculiarity: We have to use the native version of the name
+            // (e.g. HIVE.whatever.whatever for Hive tables) instead of the external
+            // table name (e.g. TRAFODION._HV_whatever.whatever) in the SELECT.
+            ddl += " NO LOAD AS SELECT ";
+            addTruncatedSelectList(ddl);
+            ddl += " FROM ";
+            ddl += objDef->getObjectFullName().data();  // e.g. HIVE.whatever.whatever
+            // ddl += tableOptions;  unfortunately not supported with CREATE TABLE AS SELECT
+          }
         else
-        {
-          ddl += userTabName;
-        }
+          {
+            ddl += " LIKE ";
 
-        ddl += tableOptions;
+            // is this an MV LOG table?
+            if (objDef->getNameSpace() == COM_IUD_LOG_TABLE_NAME)
+              {
+                ddl += "TABLE (IUD_LOG_TABLE ";
+                ddl += userTabName;
+                ddl += ") ";
+              }
+            else
+              {
+                ddl += userTabName;
+              }
+
+            ddl += tableOptions;    
+          }
         tableType = ANSI_TABLE;
         sampleName = new(STMTHEAP) ComObjectName(tempTabName,
                                                  COM_UNKNOWN_NAME,
@@ -648,7 +666,6 @@ Lng32 HSSample::create(NAString& tblName, NABoolean unpartitioned,
NABoolean isP
         HSHandleError(retcode);
       }
 
-    HSGlobalsClass *hs_globals = GetHSContext();
     if (hs_globals && hs_globals->diagsArea.getNumber(DgSqlCode::ERROR_))
       hs_globals->diagsArea.deleteError(0);
 
@@ -5411,6 +5428,64 @@ NAString HSSample::getTempTablePartitionInfo(NABoolean unpartitionedSample,
   }
 
 
+//
+// METHOD:  addTruncatedSelectList()
+//
+// PURPOSE: Generates a SELECT list consisting of 
+//          column references or a SUBSTRING
+//          on column references which truncates the
+//          column to the maximum length allowed in
+//          UPDATE STATISTICS.
+//
+// INPUT:   'qry' - the SQL query string to append the 
+//          select list to.
+//
+void HSSample::addTruncatedSelectList(NAString & qry)
+  {
+    for (Lng32 i = 0; i < objDef->getNumCols(); i++)
+      {
+        if (i)
+          qry += ", ";
+
+        addTruncatedColumnReference(qry,objDef->getColInfo(i));
+      }
+  }
+
+
+//
+// METHOD:  addTruncatedColumnReference()
+//
+// PURPOSE: Generates a column reference or a SUBSTRING
+//          on a column reference which truncates the
+//          column to the maximum length allowed in
+//          UPDATE STATISTICS.
+//
+// INPUT:   'qry' - the SQL query string to append the 
+//          reference to.
+//          'colInfo' - struct containing datatype info
+//          about the column.
+//
+void HSSample::addTruncatedColumnReference(NAString & qry,HSColumnStruct & colInfo)
+  {
+    Lng32 maxLengthInBytes = MAX_SUPPORTED_CHAR_LENGTH;
+    bool isOverSized = DFS2REC::isAnyCharacter(colInfo.datatype) &&
+                           (colInfo.length > maxLengthInBytes);
+    if (isOverSized)
+      {
+        qry += "SUBSTRING(";
+        qry += colInfo.colname->data();
+        qry += " FOR ";
+        
+        char temp[20];  // big enough for "nnnnnn) AS "
+        sprintf(temp,"%d) AS ", maxLengthInBytes / CharInfo::maxBytesPerChar(colInfo.charset));
+        qry += temp;
+        qry += colInfo.colname->data();
+      }
+    else
+      qry += colInfo.colname->data();
+  }
+
+
 // Print the heading for the display of a query plan to the log.
 void printPlanHeader(HSLogMan *LM)
   {

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8a7fe537/core/sql/ustat/hs_cli.h
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_cli.h b/core/sql/ustat/hs_cli.h
index 3d91ac4..e30a102 100644
--- a/core/sql/ustat/hs_cli.h
+++ b/core/sql/ustat/hs_cli.h
@@ -57,6 +57,7 @@ class HSDataBuffer;
 class HSTableDef;
 struct HSColDesc;
 struct HSColGroupStruct;
+struct HSColumnStruct;
 class ISFixedChar;
 class ISVarChar;
 class MCWrapper;
@@ -193,6 +194,9 @@ class HSSample
 
     NABoolean isIUS() { return isIUS_; }
 
+    void addTruncatedSelectList(NAString & qry);
+    static void addTruncatedColumnReference(NAString & qry, HSColumnStruct & colInfo);
+
   private:
     // Member function
     void makeTableName(NABoolean isPersSample = FALSE);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8a7fe537/core/sql/ustat/hs_globals.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.cpp b/core/sql/ustat/hs_globals.cpp
index 70adda5..00a15db 100644
--- a/core/sql/ustat/hs_globals.cpp
+++ b/core/sql/ustat/hs_globals.cpp
@@ -2841,6 +2841,7 @@ HSGlobalsClass::HSGlobalsClass(ComDiagsArea &diags)
   : catSch(new(STMTHEAP) NAString(STMTHEAP)),
     isHbaseTable(FALSE),
     isHiveTable(FALSE),
+    hasOversizedColumns(FALSE),
     user_table(new(STMTHEAP) NAString(STMTHEAP)),
     numPartitions(0),
     hstogram_table(new(STMTHEAP) NAString(STMTHEAP)),
@@ -4001,7 +4002,18 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
 
     dml  = insertType;
     dml += sampleTable;
-    dml += " SELECT * FROM ";
+    dml += " SELECT ";
+    if (hs_globals->hasOversizedColumns)
+      {
+        // The source table has an oversized column. We have to generate
+        // SUBSTRING calls on such columns to fit them into the sample
+        // table.
+        addTruncatedSelectList(dml);
+      }      
+    else
+      dml += "*";
+
+    dml += " FROM ";
 
     NAString hiveSrc = CmpCommon::getDefaultString(USE_HIVE_SOURCE);
     if (! hiveSrc.isNull())
@@ -4874,9 +4886,12 @@ static void mapInternalSortTypes(HSColGroupStruct *groupList, NABoolean
forHive
       default:
         group->ISdatatype = col.datatype;
         group->ISlength = col.length;
+        if (group->ISlength > MAX_SUPPORTED_CHAR_LENGTH)
+          group->ISlength = MAX_SUPPORTED_CHAR_LENGTH;
         group->ISprecision = col.precision;
         group->ISscale = col.scale;
-        group->ISSelectExpn.append(columnName);
+        // the method below handles adding SUBSTRING for over-size char/varchars
+        HSSample::addTruncatedColumnReference(group->ISSelectExpn,col);
         break;
      } // switch
      group = group->next;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8a7fe537/core/sql/ustat/hs_globals.h
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_globals.h b/core/sql/ustat/hs_globals.h
index 10381a8..ae47a8f 100644
--- a/core/sql/ustat/hs_globals.h
+++ b/core/sql/ustat/hs_globals.h
@@ -84,6 +84,16 @@ Lng32 setBufferValue(T& value,
                       HSDataBuffer &boundary);
 
 
+
+// This is the max supported length of character strings in UPDATE STATISTICS.
+// We will process longer columns, but we truncate their values to this length
+// during the processing. As a result, we may underestimate UEC, if, for 
+// example, the first 32767 bytes are identical but some difference occurs
+// afterwards. If we someday wish to support longer lengths, at the very least
+// the ISVarChar class needs to change to use a longer length field for varchar
+// values.
+enum { MAX_SUPPORTED_CHAR_LENGTH = 32767 };
+
 // An instance of ISFixedChar represents a value of a fixed-length character
 // string (either single or double-byte) retrieved into memory for use by
 // internal sort. A pointer to the actual string is maintained, and definitions
@@ -1583,6 +1593,8 @@ public:
     NAString      *user_table;                     /* object name             */
     NABoolean     isHbaseTable;                    /* ustat on HBase table    */
     NABoolean     isHiveTable;                     /* ustat on Hive table     */
+    NABoolean     hasOversizedColumns;             /* set to TRUE for tables  */
+                                                   /* having gigantic columns */
     ComAnsiNameSpace nameSpace;                    /* object namespace    ++MV*/
     Int64          numPartitions;                  /* # of partns in object   */
     NAString      *hstogram_table;                 /* HISTOGRM table          */

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8a7fe537/core/sql/ustat/hs_log.h
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_log.h b/core/sql/ustat/hs_log.h
index 18e6ec5..783b96f 100644
--- a/core/sql/ustat/hs_log.h
+++ b/core/sql/ustat/hs_log.h
@@ -150,12 +150,14 @@ void HSFuncLogError(Lng32 error, char *filename, Lng32 lineno);
 //    [6008] missing single-column histograms
 //    [6007] missing multi-column histograms
 //    [4030] non-standard DATETIME format
+//    [2053] Optimizer pass two assertion failure (optimizer still attempts to produce a
plan)
 //    [4]    internal Warning
 #define HSFilterWarning(retcode) \
         { \
           if ((retcode == 6008) || \
               (retcode == 6007) || \
               (retcode == 4030) || \
+              (retcode == 2053) || \
               (retcode == HS_WARNING)) \
             retcode = 0; \
         }

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8a7fe537/core/sql/ustat/hs_parser.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_parser.cpp b/core/sql/ustat/hs_parser.cpp
index d96acc7..7ec3d37 100644
--- a/core/sql/ustat/hs_parser.cpp
+++ b/core/sql/ustat/hs_parser.cpp
@@ -520,6 +520,13 @@ HSColGroupStruct* AddSingleColumn(const Lng32 colNumber, HSColGroupStruct*&
grou
     HSColGroupStruct *newGroup = new(STMTHEAP) HSColGroupStruct;
     HSColumnStruct   newColumn = HSColumnStruct(hs_globals->objDef->getColInfo(colNumber));
 
+    bool isOverSized = DFS2REC::isAnyCharacter(newColumn.datatype) &&
+              (newColumn.length > MAX_SUPPORTED_CHAR_LENGTH);
+    if (isOverSized)
+      {
+        hs_globals->hasOversizedColumns = TRUE;
+      }
+
     newColumn.colnum  = colNumber;
     newGroup->colSet.insert((const HSColumnStruct) newColumn);
     newGroup->colCount = 1;


Mime
View raw message