trafodion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dbirds...@apache.org
Subject [3/4] incubator-trafodion git commit: [TRAFODION-2655] Fix two MDAM optimizer bugs. Update optimizer simulator.
Date Thu, 22 Jun 2017 22:40:52 GMT
[TRAFODION-2655] Fix two MDAM optimizer bugs. Update optimizer simulator.


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/802029e5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/802029e5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/802029e5

Branch: refs/heads/master
Commit: 802029e583f790e7dd3ea0ecfbf3eb921dd8f073
Parents: 1083be3
Author: Dave Birdsall <dbirdsall@apache.org>
Authored: Wed Jun 21 22:58:41 2017 +0000
Committer: Dave Birdsall <dbirdsall@apache.org>
Committed: Wed Jun 21 22:58:41 2017 +0000

----------------------------------------------------------------------
 core/sql/arkcmp/CmpContext.cpp            |    2 +
 core/sql/arkcmp/CmpContext.h              |    7 +
 core/sql/cli/Context.h                    |    3 +
 core/sql/export/NAStringDef.cpp           |   14 +
 core/sql/export/NAStringDef.h             |    8 +-
 core/sql/optimizer/HDFSHook.cpp           |   58 +-
 core/sql/optimizer/HDFSHook.h             |   78 +-
 core/sql/optimizer/NAClusterInfo.cpp      |  265 +--
 core/sql/optimizer/NAClusterInfo.h        |   12 +-
 core/sql/optimizer/NodeMap.cpp            |   12 +-
 core/sql/optimizer/ObjectNames.cpp        |   17 +
 core/sql/optimizer/ObjectNames.h          |    3 +
 core/sql/optimizer/OptPhysRelExpr.cpp     |    2 +-
 core/sql/optimizer/OptimizerSimulator.cpp | 2322 +++++++++++++++++-------
 core/sql/optimizer/OptimizerSimulator.h   |  257 ++-
 core/sql/optimizer/ScanOptimizer.cpp      |   22 +-
 core/sql/optimizer/ScmCostMethod.cpp      |    2 +-
 core/sql/sqlcomp/DefaultConstants.h       |    3 +
 core/sql/sqlcomp/nadefaults.cpp           |   23 +-
 19 files changed, 2076 insertions(+), 1034 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/arkcmp/CmpContext.cpp
----------------------------------------------------------------------
diff --git a/core/sql/arkcmp/CmpContext.cpp b/core/sql/arkcmp/CmpContext.cpp
index 7ca7d32..a2cdeb9 100644
--- a/core/sql/arkcmp/CmpContext.cpp
+++ b/core/sql/arkcmp/CmpContext.cpp
@@ -131,6 +131,8 @@ CmpContext::CmpContext(UInt32 f, CollHeap * h)
   sqlTextBuf_(NULL),
   uninitializedSeabaseErrNum_(0),
   hbaseErrNum_(0),
+  numSQNodes_(0),
+  hasVirtualSQNodes_(FALSE),
   trafMDDescsInfo_(NULL),
   transMode_(TransMode::IL_NOT_SPECIFIED_,    // init'd below
              TransMode::READ_WRITE_,

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/arkcmp/CmpContext.h
----------------------------------------------------------------------
diff --git a/core/sql/arkcmp/CmpContext.h b/core/sql/arkcmp/CmpContext.h
index 144d18b..dbe970d 100644
--- a/core/sql/arkcmp/CmpContext.h
+++ b/core/sql/arkcmp/CmpContext.h
@@ -395,7 +395,12 @@ public :
   void setArkcmpEnvDirect(const char *name, const char *value,
                           NABoolean unset);
 
+  // variables managed by the HHDFSMasterHostList class
   ARRAY(const char *) *getHosts() { return &hosts_; }
+  CollIndex getNumSQNodes() { return numSQNodes_; }
+  void setNumSQNodes(CollIndex n) { numSQNodes_ = n; }
+  NABoolean getHasVirtualSQNodes() { return hasVirtualSQNodes_; }
+  void setHasVirtualSQNodes(NABoolean v) { hasVirtualSQNodes_ = v; }
 #endif // NA_CMPDLL
 
   // used by sendAllControlsAndFlags() and restoreAllControlsAndFlags()
@@ -585,6 +590,8 @@ private:
   Lng32 hbaseErrNum_; 
   NAString hbaseErrStr_;
   ARRAY(const char *) hosts_;
+  CollIndex numSQNodes_;
+  NABoolean hasVirtualSQNodes_;
   NAClusterInfo *clusterInfo_;
   RuleSet *ruleSet_;
   OptDebug *optDbg_;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/cli/Context.h
----------------------------------------------------------------------
diff --git a/core/sql/cli/Context.h b/core/sql/cli/Context.h
index 0489acf..d1cce8d 100644
--- a/core/sql/cli/Context.h
+++ b/core/sql/cli/Context.h
@@ -207,6 +207,9 @@ public:
   //expose cmpContextInfo_ to get HQC info of different contexts
   const NAArray<CmpContextInfo *> & getCmpContextInfo() const { return cmpContextInfo_;
}
 
+  //expose cmpContext stack to allow search
+  const LIST(CmpContext *)& getCmpContextsInUse() const { return  cmpContextInUse_; }
+
   CollIndex addTrustedRoutine(LmRoutine *r);
   LmRoutine *findTrustedRoutine(CollIndex ix);
   void putTrustedRoutine(CollIndex ix);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/export/NAStringDef.cpp
----------------------------------------------------------------------
diff --git a/core/sql/export/NAStringDef.cpp b/core/sql/export/NAStringDef.cpp
index 94ef1d6..b9d0960 100644
--- a/core/sql/export/NAStringDef.cpp
+++ b/core/sql/export/NAStringDef.cpp
@@ -239,6 +239,19 @@ NAString::copy() const
   return temp;
 }
 
+int
+NAString::extract(int begin, int end, NAString & target) const
+{
+  if(end >= length() || begin < 0 ||end < begin) 
+      return -1;
+
+  for(int i = begin; i <= end; i++)
+      target.append((*this)[i]);
+
+  return end - begin;
+}
+
+
 UInt32 
 NAString::hashFoldCase() const
 {
@@ -424,6 +437,7 @@ char* NAString::buildBuffer(const char* formatTemplate, va_list args)
   int bufferSize = 20049;
   int msgSize = 0;
   //buffer is managed by this static function
+  //the allocated memory is shared by all NAString objects.
   static THREAD_P char *buffer = NULL;
   va_list args2;
   va_copy(args2, args);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/export/NAStringDef.h
----------------------------------------------------------------------
diff --git a/core/sql/export/NAStringDef.h b/core/sql/export/NAStringDef.h
index e46b6e1..c5e1124 100644
--- a/core/sql/export/NAStringDef.h
+++ b/core/sql/export/NAStringDef.h
@@ -379,6 +379,8 @@ public:
   NABoolean     contains(const NAString& pat, caseCompare cmp = exact) const;
 
   NAString      copy() const;
+  //Extract part of this string and append it to target
+  int extract(int begin, int end, NAString & target) const;
   inline const char*   toCharStar() const {return fbstring_.data();} ;
   const char*   data() const {return fbstring_.data();}
   size_t        first(char c) const { return fbstring_.find_first_of(c);}
@@ -393,6 +395,10 @@ public:
   UInt32      hash() const;
   UInt32      hashFoldCase() const;
   void        mash(UInt32& hash, UInt32 chars) const;
+
+  // index methods return:
+  //   NA_NPOS if not found.
+  //   0 based index, if found.
   size_t        index(const char* pat, size_t i=0, caseCompare cmp = exact)
                       const;
   
@@ -477,7 +483,7 @@ protected:
   void                  cow(size_t nc);                 // Do copy on write as needed
   
   void                  initChar(char, NAMemory *h);    // Initialize from char
-
+public:
   static char* buildBuffer(const char* formatTemplate, va_list args);
   
 private:

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/optimizer/HDFSHook.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/HDFSHook.cpp b/core/sql/optimizer/HDFSHook.cpp
index 1a1cac4..9903be3 100644
--- a/core/sql/optimizer/HDFSHook.cpp
+++ b/core/sql/optimizer/HDFSHook.cpp
@@ -489,7 +489,7 @@ void HHDFSBucketStats::addFile(hdfsFS fs, hdfsFileInfo *fileInfo,
                                char recordTerminator,
                                CollIndex pos)
 {
-  HHDFSFileStats *fileStats = new(heap_) HHDFSFileStats(heap_);
+  HHDFSFileStats *fileStats = new(heap_) HHDFSFileStats(heap_, getTable());
 
   if ( scount_ > 10 )
     doEstimate = FALSE;
@@ -527,6 +527,19 @@ void HHDFSBucketStats::print(FILE *ofd)
   HHDFSStatsBase::print(ofd, "bucket");
 }
 
+OsimHHDFSStatsBase* HHDFSBucketStats::osimSnapShot(NAMemory * heap)
+{
+    OsimHHDFSBucketStats* stats = new(heap) OsimHHDFSBucketStats(NULL, this, heap);
+    
+    for(Int32 i = 0; i < fileStatsList_.getUsedLength(); i++){
+            //"gaps" are not added, but record the position
+            if(fileStatsList_.getUsage(i) != UNUSED_COLL_ENTRY)
+                stats->addEntry(fileStatsList_[i]->osimSnapShot(heap), i);
+    }
+    return stats;
+}
+
+
 HHDFSListPartitionStats::~HHDFSListPartitionStats()
 {
   for (CollIndex b=0; b<=defaultBucketIdx_; b++)
@@ -580,7 +593,7 @@ void HHDFSListPartitionStats::populate(hdfsFS fs,
 
             if (! bucketStatsList_.used(bucketNum))
               {
-                bucketStats = new(heap_) HHDFSBucketStats(heap_);
+                bucketStats = new(heap_) HHDFSBucketStats(heap_, getTable());
                 bucketStatsList_.insertAt(bucketNum, bucketStats);
               }
             else
@@ -639,7 +652,7 @@ NABoolean HHDFSListPartitionStats::validateAndRefresh(hdfsFS fs, HHDFSDiags
&dia
                 // first file for a new bucket got added
                 if (!refresh)
                   return FALSE;
-                bucketStats = new(heap_) HHDFSBucketStats(heap_);
+                bucketStats = new(heap_) HHDFSBucketStats(heap_, getTable());
                 bucketStatsList_.insertAt(bucketNum, bucketStats);
               }
             else
@@ -798,6 +811,19 @@ void HHDFSListPartitionStats::print(FILE *ofd)
   HHDFSStatsBase::print(ofd, "partition");
 }
 
+OsimHHDFSStatsBase* HHDFSListPartitionStats::osimSnapShot(NAMemory * heap)
+{
+    OsimHHDFSListPartitionStats* stats = new(heap) OsimHHDFSListPartitionStats(NULL, this,
heap);
+
+    for(Int32 i = 0; i < bucketStatsList_.getUsedLength(); i++)
+    {
+        //"gaps" are not added, but record the position
+        if(bucketStatsList_.getUsage(i) != UNUSED_COLL_ENTRY)
+            stats->addEntry(bucketStatsList_[i]->osimSnapShot(heap), i);
+    }
+    return stats;
+}
+
 HHDFSTableStats::~HHDFSTableStats()
 {
   for (int p=0; p<totalNumPartitions_; p++)
@@ -1017,7 +1043,8 @@ NABoolean HHDFSTableStats::splitLocation(const char *tableLocation,
 void HHDFSTableStats::processDirectory(const NAString &dir, Int32 numOfBuckets, 
                                        NABoolean doEstimate, char recordTerminator)
 {
-  HHDFSListPartitionStats *partStats = new(heap_) HHDFSListPartitionStats(heap_);
+  HHDFSListPartitionStats *partStats = new(heap_)
+    HHDFSListPartitionStats(heap_, this);
   partStats->populate(fs_, dir, numOfBuckets, diags_, doEstimate, recordTerminator);
 
   if (diags_.isSuccess())
@@ -1118,3 +1145,26 @@ void HHDFSTableStats::disconnectHDFS()
   // is dropped or the thread exits.
 }
 
+
+OsimHHDFSStatsBase* HHDFSTableStats::osimSnapShot(NAMemory * heap)
+{
+    OsimHHDFSTableStats* stats = new(heap) OsimHHDFSTableStats(NULL, this, heap);
+
+    for(Int32 i = 0; i < listPartitionStatsList_.getUsedLength(); i++)
+    {
+        //"gaps" are not added, but record the position
+        if(listPartitionStatsList_.getUsage(i) != UNUSED_COLL_ENTRY)
+            stats->addEntry(listPartitionStatsList_[i]->osimSnapShot(heap), i);
+    }
+    return stats;
+}
+
+OsimHHDFSStatsBase* HHDFSFileStats::osimSnapShot(NAMemory * heap)
+{
+    OsimHHDFSFileStats* stats = new(heap) OsimHHDFSFileStats(NULL, this, heap);
+
+    return stats;
+}
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/optimizer/HDFSHook.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/HDFSHook.h b/core/sql/optimizer/HDFSHook.h
index 7eef0bd..1c60d0f 100644
--- a/core/sql/optimizer/HDFSHook.h
+++ b/core/sql/optimizer/HDFSHook.h
@@ -49,6 +49,9 @@
 // forward declarations
 struct hive_tbl_desc;
 class HivePartitionAndBucketKey;
+class HHDFSTableStats;
+class OsimHHDFSStatsBase;
+class OptimizerSimulator;
 
 typedef CollIndex HostId;
 typedef Int64 BucketNum;
@@ -57,6 +60,7 @@ typedef Int64 Offset;
 
 class HHDFSMasterHostList : public NABasicObject
 {
+friend class OptimizerSimulator;
 public:
   HHDFSMasterHostList(NAMemory *heap) {}
   ~HHDFSMasterHostList();
@@ -112,13 +116,18 @@ private:
 
 class HHDFSStatsBase : public NABasicObject
 {
+  friend class OsimHHDFSStatsBase;
 public:
-  HHDFSStatsBase() : numBlocks_(0),
-                     numFiles_(0),
-                     totalSize_(0),
-                     modificationTS_(0),
-                     sampledBytes_(0),
-                     sampledRows_(0) {}
+  HHDFSStatsBase(HHDFSTableStats *table) : numBlocks_(0),
+                                           numFiles_(0),
+                                           totalRows_(0),
+                                           totalStringLengths_(0),
+                                           totalSize_(0),
+                                           numStripes_(0),
+                                           modificationTS_(0),
+                                           sampledBytes_(0),
+                                           sampledRows_(0),
+                                           table_(table) {}
 
   void add(const HHDFSStatsBase *o);
   void subtract(const HHDFSStatsBase *o);
@@ -126,6 +135,9 @@ public:
   Int64 getTotalSize() const { return totalSize_; }
   Int64 getNumFiles() const { return numFiles_; }
   Int64 getNumBlocks() const { return numBlocks_; }
+  Int64 getTotalRows() const { return totalRows_; }
+  Int64 getTotalStringLengths() { return totalStringLengths_; }
+  Int64 getNumStripes() const { return numStripes_; }
   Int64 getSampledBytes() const { return sampledBytes_; }
   Int64 getSampledRows() const { return sampledRows_; }
   time_t getModificationTS() const { return modificationTS_; }
@@ -133,22 +145,34 @@ public:
   Int64 getEstimatedRowCount() const;
   Int64 getEstimatedRecordLength() const;
   void print(FILE *ofd, const char *msg);
+  const HHDFSTableStats *getTable() const { return table_; }
+  HHDFSTableStats *getTable() { return table_; }
+  
+  virtual OsimHHDFSStatsBase* osimSnapShot(NAMemory * heap){ return NULL; }
 
 protected:
   Int64 numBlocks_;
   Int64 numFiles_;
+  Int64 totalRows_;  // for ORC files
+  Int64 numStripes_;  // for ORC files
+  Int64 totalStringLengths_;  // for ORC files
   Int64 totalSize_;
   time_t modificationTS_; // last modification time of this object (file, partition/directory,
bucket or table)
   Int64 sampledBytes_;
   Int64 sampledRows_;
+  HHDFSTableStats *table_;
 };
 
 class HHDFSFileStats : public HHDFSStatsBase
 {
+  friend class OsimHHDFSFileStats;
 public:
-  HHDFSFileStats(NAMemory *heap) : heap_(heap),
-                                   fileName_(heap),
-                                   blockHosts_(NULL) {}
+  HHDFSFileStats(NAMemory *heap,
+                 HHDFSTableStats *table) :
+       HHDFSStatsBase(table),
+       heap_(heap),
+       fileName_(heap),
+       blockHosts_(NULL) {}
   ~HHDFSFileStats();
   void populate(hdfsFS fs,
                 hdfsFileInfo *fileInfo,
@@ -162,7 +186,9 @@ public:
   Int64 getBlockSize() const                            { return blockSize_; }
   HostId getHostId(Int32 replicate, Int64 blockNum) const
                         { return blockHosts_[replicate*numBlocks_+blockNum]; }
-  void print(FILE *ofd);
+  virtual void print(FILE *ofd);
+  
+  virtual OsimHHDFSStatsBase* osimSnapShot(NAMemory * heap);
 
 private:
 
@@ -178,8 +204,12 @@ private:
 
 class HHDFSBucketStats : public HHDFSStatsBase
 {
+  friend class OsimHHDFSBucketStats;
 public:
-  HHDFSBucketStats(NAMemory *heap) : heap_(heap), fileStatsList_(heap), scount_(0) {}
+  HHDFSBucketStats(NAMemory *heap,
+                   HHDFSTableStats *table) :
+       HHDFSStatsBase(table),
+       heap_(heap), fileStatsList_(heap), scount_(0) {}
   ~HHDFSBucketStats();
 
   const CollIndex entries() const         { return fileStatsList_.entries(); }
@@ -196,6 +226,10 @@ public:
   void removeAt(CollIndex i);
   void print(FILE *ofd);
 
+  void insertAt(Int32 pos, HHDFSFileStats* st){  fileStatsList_.insertAt(pos, st);  }
+  
+  virtual OsimHHDFSStatsBase* osimSnapShot(NAMemory * heap);
+
 private:
 
   // list of files in this bucket
@@ -207,8 +241,12 @@ private:
 
 class HHDFSListPartitionStats : public HHDFSStatsBase
 {
+    friend class OsimHHDFSListPartitionStats;
 public:
-  HHDFSListPartitionStats(NAMemory *heap) : heap_(heap), partitionDir_(heap),
+  HHDFSListPartitionStats(NAMemory *heap,
+                          HHDFSTableStats *table) :
+       HHDFSStatsBase(table),
+       heap_(heap), partitionDir_(heap),
     bucketStatsList_(heap),
     doEstimation_(FALSE),
     recordTerminator_(0)
@@ -232,10 +270,16 @@ public:
   Int32 determineBucketNum(const char *fileName);
   void print(FILE *ofd);
 
+  void insertAt(Int32 pos, HHDFSBucketStats* st){  bucketStatsList_.insertAt(pos, st);  }
+  
+  virtual OsimHHDFSStatsBase* osimSnapShot(NAMemory * heap);
+
 private:
 
   // directory of the partition
   NAString partitionDir_;
+  NAString partitionKeyValues_;
+  int partIndex_; // index in HDFSTableStats list
 
   // number of buckets (from table DDL) or 0 if partition is not bucketed
   // Note this value can never be 1. This value indicates the last
@@ -258,8 +302,11 @@ private:
 class HHDFSTableStats : public HHDFSStatsBase
 {
   friend class HivePartitionAndBucketKey; // to be able to make a subarray of the partitions
+  friend class OsimHHDFSTableStats;
+  friend class OptimizerSimulator;
 public:
-  HHDFSTableStats(NAMemory *heap) : currHdfsPort_(-1),
+  HHDFSTableStats(NAMemory *heap) : HHDFSStatsBase(this),
+                                    currHdfsPort_(-1),
                                     fs_(NULL),
                                     hdfsPortOverride_(-1),
                                     tableDir_(heap),
@@ -337,6 +384,11 @@ public:
 
   const NAString &tableDir() const { return tableDir_; }
 
+  void insertAt(Int32 pos, HHDFSListPartitionStats * st) {  listPartitionStatsList_.insertAt(pos,
st);  }
+  virtual OsimHHDFSStatsBase* osimSnapShot(NAMemory * heap);
+  void captureHiveTableStats(const NAString &tableName, Int64 lastModificationTs, hive_tbl_desc*
hvt_desc );
+  static HHDFSTableStats * restoreHiveTableStats(const NAString & tableName,  Int64 lastModificationTs,
 hive_tbl_desc* hvt_desc, NAMemory* heap);
+
   const Lng32 numOfPartCols() const { return numOfPartCols_; }
   const Lng32 totalNumPartitions() const { return totalNumPartitions_; }
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/optimizer/NAClusterInfo.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/NAClusterInfo.cpp b/core/sql/optimizer/NAClusterInfo.cpp
index 7682d08..17d3162 100644
--- a/core/sql/optimizer/NAClusterInfo.cpp
+++ b/core/sql/optimizer/NAClusterInfo.cpp
@@ -310,7 +310,7 @@ NAClusterInfo::NAClusterInfo(CollHeap * heap)
                                                           (&intHashFunc, 101,TRUE,heap);
                                                           
       activeClusters_= NULL;
-      smpCount_ = -1;
+      physicalSMPCount_ = -1;
 
       NADefaults::getNodeAndClusterNumbers(localSMP_ , localCluster_);
 
@@ -336,7 +336,7 @@ NAClusterInfo::NAClusterInfo(CollHeap * heap)
       CMPASSERT(error == 0);
 
       maps *cpuList=new(heap) maps(heap);
-      smpCount_ = 0;
+      physicalSMPCount_ = 0;
 
       NAList<CollIndex> storageList(heap, nodeCount);
 
@@ -357,7 +357,7 @@ NAClusterInfo::NAClusterInfo(CollHeap * heap)
             cpuList->insertToAggregationNodeList(nodeInfo[i].nid);
 
           if (!nodeInfo[i].spare_node)
-             smpCount_++;
+             physicalSMPCount_++;
 
           // store nodeName-nodeId pairs
           NAString *key_nodeName = new (heap_) NAString(nodeInfo[i].node_name, heap_);
@@ -414,7 +414,7 @@ NAClusterInfo::NAClusterInfo(CollHeap * heap)
       clusterToCPUMap_ = NULL;
       nodeIdToNodeNameMap_ = NULL;
       activeClusters_= NULL;
-      smpCount_ = -1;
+      physicalSMPCount_ = -1;
       //load NAClusterInfo from OSIM file
       simulateNAClusterInfo();
       break;
@@ -468,181 +468,6 @@ NAClusterInfo::~NAClusterInfo()
 
 }
 
-//============================================================================
-// This method writes the information related to the NAClusterInfo class to a
-// logfile called "NAClusterInfo.txt".
-//============================================================================
-void NAClusterInfo::captureNAClusterInfo(ofstream & naclfile)
-{
-  CollIndex i, ci;
-  char filepath[OSIM_PATHMAX];
-  char filename[OSIM_FNAMEMAX];
-
-  // We don't capture data members that are computed during the compilation of
-  // a query. These include:
-  //
-  // * smpCount_;
-  // * tableToClusterMap_;
-  // * activeClusters_;
-  //
-
-  naclfile << "localCluster_: " << localCluster_ << endl
-           << "localSMP_: " << localSMP_ << endl;
-
-  CollIndex *key_collindex;  
-  maps *val_maps;
-  // Iterator for logging all the entries in clusterToCPUMap_ HashDictionary.
-  NAHashDictionaryIterator<CollIndex, maps> C2CPUIter (*clusterToCPUMap_, NULL, NULL);
 
-  naclfile << "clusterToCPUMap_: " << C2CPUIter.entries() << " :" <<
endl;
-  if (C2CPUIter.entries() > 0)
-  {
-    // Write the header line for the table.
-    naclfile << "  ";
-    naclfile.width(10); 
-    naclfile << "clusterNum" << "  ";
-    naclfile << "cpuList" << endl;
-    for (i=0; i<C2CPUIter.entries(); i++)
-    {
-      C2CPUIter.getNext(key_collindex, val_maps);
-      naclfile << "  ";
-      naclfile.width(10); naclfile << *key_collindex << "  ";
-                          naclfile << val_maps->list->entries() << " :
";
-      for (ci=0; ci<val_maps->list->entries(); ci++)
-      {
-        naclfile.width(3); naclfile << (*(val_maps->list))[ci] << " ";
-      }
-      naclfile << endl;
-    }
-  }
-
-  Int32 * nodeID = NULL;
-  NAString* nodeName = NULL;
-  NAHashDictionaryIterator<Int32, NAString> nodeNameAndIDIter (*nodeIdToNodeNameMap_);
-  naclfile << "nodeIdAndNodeNameMap: " << nodeNameAndIDIter.entries() <<
endl;
-  for(nodeNameAndIDIter.getNext(nodeID, nodeName); nodeID && nodeName; nodeNameAndIDIter.getNext(nodeID,
nodeName))
-  {
-      naclfile << *nodeID << " " << nodeName->data() << endl;
-  }
-
-  // Now save the OS-specific information to the NAClusterInfo.txt file
-  captureOSInfo(naclfile);
-}
-
-//============================================================================
-// This method reads the information needed for NAClusterInfo class from
-// a logfile called "NAClusterInfo.txt" and then populates the variables
-// accordigly.
-//============================================================================
-void NAClusterInfo::simulateNAClusterInfo()
-{
-  Int32 i, ci;
-  char var[256];
-
-  const char* filepath = CURRCONTEXT_OPTSIMULATOR->getLogFilePath(OptimizerSimulator::NACLUSTERINFO);
-
-  activeClusters_= NULL;
-  smpCount_ = -1;
-
-  ifstream naclfile(filepath);
-
-  if(!naclfile.good())
-  {
-    char errMsg[38+OSIM_PATHMAX+1]; // Error msg below + filename + '\0'
-    snprintf(errMsg, sizeof(errMsg), "Unable to open %s file for reading data.", filepath);
-    OsimLogException(errMsg, __FILE__, __LINE__).throwException();
-  }
-  
-  while(naclfile.good())
-  {
-    // Read the variable name from the file.
-    naclfile.getline(var, sizeof(var), ':');
-    if(!strcmp(var, "localCluster_"))
-    {
-      naclfile >> localCluster_; naclfile.ignore(OSIM_LINEMAX, '\n');
-    }
-    else if (!strcmp(var, "localSMP_"))
-    {
-      naclfile >> localSMP_; naclfile.ignore(OSIM_LINEMAX, '\n');
-    }
-    else if (!strcmp(var, "clusterToCPUMap_"))
-    {
-      Int32 C2CPU_entries, clusterNum, cpuList_entries, cpuNum;
-
-      clusterToCPUMap_ = new(heap_) NAHashDictionary<CollIndex,maps>(&clusterNumHashFunc,17,TRUE,
heap_);
-      naclfile >> C2CPU_entries; naclfile.ignore(OSIM_LINEMAX, '\n');
-      if(C2CPU_entries > 0)
-      {
-        // Read and ignore the header line.
-        naclfile.ignore(OSIM_LINEMAX, '\n');
-        for (i=0; i<C2CPU_entries; i++)
-        {
-          naclfile >> clusterNum;
-          naclfile >> cpuList_entries; naclfile.ignore(OSIM_LINEMAX, ':');
-          CollIndex *key_clusterNum = new(heap_) CollIndex(clusterNum);
-          maps *val_cpuList = new(heap_) maps(heap_);
-          for (ci=0; ci<cpuList_entries; ci++)
-          {
-            naclfile >> cpuNum;
-            val_cpuList->list->insert(cpuNum);
-          }
-          naclfile.ignore(OSIM_LINEMAX, '\n');
-          CollIndex *checkClusterNum = clusterToCPUMap_->insert(key_clusterNum, val_cpuList);
-          CMPASSERT(checkClusterNum);
-        }
-      }
-    }
-    else if(!strcmp(var, "nodeIdAndNodeNameMap"))
-    {
-      Int32 id_name_entries;
-      Int32 nodeId;
-      char nodeName[256];
-      nodeIdToNodeNameMap_ = new(heap_) NAHashDictionary<Int32, NAString>
-                                                          (&intHashFunc, 101,TRUE,heap_);
-                                                          
-      nodeNameToNodeIdMap_ = new(heap_) NAHashDictionary<NAString, Int32>
-                                                          (&NAString::hash, 101,TRUE,heap_);
-      naclfile >> id_name_entries;
-      naclfile.ignore(OSIM_LINEMAX, '\n');
-      for(i = 0; i < id_name_entries; i++)
-      {
-          naclfile >> nodeId >> nodeName;
-          naclfile.ignore(OSIM_LINEMAX, '\n');
-          
-          //populate clusterId<=>clusterName map from file
-          Int32 * key_nodeId = new Int32(nodeId);
-          NAString * val_nodeName = new (heap_) NAString(nodeName, heap_);
-          Int32 * retId = nodeIdToNodeNameMap_->insert(key_nodeId, val_nodeName);
-          //CMPASSERT(retId);
-          
-          NAString * key_nodeName = new (heap_) NAString(nodeName, heap_);
-          Int32 * val_nodeId = new Int32(nodeId);
-          NAString * retName = nodeNameToNodeIdMap_->insert(key_nodeName, val_nodeId);
-          //some node names are like g4t3024:0, g4t3024:1
-          //I don't know why we need to remove strings after ':' or '.' in node name,
-          //but if string after ':' or '.' is removed, same node names correspond to different
node ids,
-          //this can cause problems here
-          //CMPASSERT(retName);
-      }
-    }
-    else
-    {
-      // This variable will either be read in simulateNAClusterInfoNSK()
-      // method of NAClusterInfoNSK class or is not the one that we want
-      // to read here in this method. So discard it and continue.
-      naclfile.ignore(OSIM_LINEMAX, '\n');
-      while (naclfile.peek() == ' ')
-      {
-        // The main variables are listed at the beginning of a line
-        // with additional information indented. If one or more spaces
-        // are seen at the beginning of the line upon the entry to this
-        // while loop, it is because of that additional information.
-        // So, ignore this line since the variable is being ignored.
-        naclfile.ignore(OSIM_LINEMAX, '\n');
-      }
-    }
-  }
-}
-
 Lng32
 NAClusterInfo::getNumActiveCluster()
 {
@@ -804,10 +629,18 @@ void NAClusterInfo::setUseAggregationNodesOnly(NABoolean x)
 }
 
 Int32
+NAClusterInfo::numOfPhysicalSMPs()
+{
+  if (physicalSMPCount_ < 0)
+    physicalSMPCount_ = computeNumOfSMPs();
+
+  return physicalSMPCount_;
+}
+
+Int32
 NAClusterInfo::numOfSMPs()
 {
-  if(smpCount_ <0)
-    smpCount_ = computeNumOfSMPs();
+  CMPASSERT(physicalSMPCount_ > 0);
 
   // This is temporary patch for PARALLEL_NUM_ESPS issue. This CQD should
   // be used in many places for costing, NodeMap allocation, synthesizing
@@ -824,11 +657,11 @@ NAClusterInfo::numOfSMPs()
     // A value for PARALLEL_NUM_ESPS exists.  Use it for the count of cpus
     //  but don't exceed the number of cpus available in the cluster.
     // -------------------------------------------------------------------
-    smpCount_ = MINOF(smpCount_, 
+    physicalSMPCount_ = MINOF(physicalSMPCount_, 
         (Int32)(ActiveSchemaDB()->getDefaults().getAsLong(PARALLEL_NUM_ESPS)));
   }
 
-  return smpCount_; 
+  return physicalSMPCount_; 
 
 } // NAClusterInfo::numOfSMPs()  
 #pragma warn(1506)  // warning elimination 
@@ -987,7 +820,6 @@ void NAClusterInfo::cleanupPerStatement()
   //After every statement activeClusters_ should be NULL 
   // because statement heap has been cleared already. 
   activeClusters_ = NULL;
-  smpCount_ = -1;
   // reset the mebers for versioning support
   maxOSV_ = COM_VERS_UNKNOWN;
 }
@@ -1175,71 +1007,6 @@ void NAClusterInfoLinux::captureOSInfo(ofstream & nacllinuxfile)
const
                 << "numCPUcoresPerNode_: " << numCPUcoresPerNode_ << endl;
 }
 
-void NAClusterInfoLinux::simulateNAClusterInfoLinux()
-{
-  char var[256];
-  
-  const char* filepath = CURRCONTEXT_OPTSIMULATOR->getLogFilePath(OptimizerSimulator::NACLUSTERINFO);
-
-  ifstream nacllinuxfile(filepath);
-
-  if(!nacllinuxfile.good())
-  {
-    char errMsg[38+OSIM_PATHMAX+1]; // Error msg below + filename + '\0'
-   // LCOV_EXCL_START
-    snprintf(errMsg, sizeof(errMsg), "Unable to open %s file for reading data.", filepath);
-    OsimLogException(errMsg, __FILE__, __LINE__).throwException();
-   // LCOV_EXCL_STOP
-  }
-
-  while(nacllinuxfile.good())
-  {
-    // Read the variable name from the file
-    nacllinuxfile.getline(var, sizeof(var), ':');
-    if(!strcmp(var, "frequency_"))
-    {
-      nacllinuxfile >> frequency_; nacllinuxfile.ignore(OSIM_LINEMAX, '\n');
-    }
-    else if (!strcmp(var, "iorate_"))
-    {
-      nacllinuxfile >> iorate_; nacllinuxfile.ignore(OSIM_LINEMAX, '\n');
-    }
-    else if (!strcmp(var, "seekTime_"))
-    {
-      nacllinuxfile >> seekTime_; nacllinuxfile.ignore(OSIM_LINEMAX, '\n');
-    }
-    else if (!strcmp(var, "pageSize_"))
-    {
-      nacllinuxfile >> pageSize_; nacllinuxfile.ignore(OSIM_LINEMAX, '\n');
-    }
-    else if (!strcmp(var, "totalMemoryAvailable_"))
-    {
-      nacllinuxfile >> totalMemoryAvailable_; nacllinuxfile.ignore(OSIM_LINEMAX, '\n');
-    }
-    else if (!strcmp(var, "numCPUcoresPerNode_"))
-    {
-      nacllinuxfile >> numCPUcoresPerNode_; nacllinuxfile.ignore(OSIM_LINEMAX, '\n');
-    }
-    else
-    {
-      // This variable either may have been read in simulateNAClusterInfo()
-      // method of NAClusterInfo class or is not the one that we want to
-      // read here in this method. So discard it.
-      nacllinuxfile.ignore(OSIM_LINEMAX, '\n');
-      while (nacllinuxfile.peek() == ' ')
-      {
-        // The main variables are listed at the beginning of a line
-        // with additional information indented. If one or more spaces
-        // are seen at the beginning of the line upon the entry to this
-        // while loop, it is because of that additional information.
-        // So, ignore this line since the variable is being ignored.
-        nacllinuxfile.ignore(OSIM_LINEMAX, '\n');
-      }
-    }
-  }
-}
-
-
 Int32 compareTSEs( const void* a, const void* b ) 
 {  
   // compare function

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/optimizer/NAClusterInfo.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/NAClusterInfo.h b/core/sql/optimizer/NAClusterInfo.h
index 8292841..52ede89 100644
--- a/core/sql/optimizer/NAClusterInfo.h
+++ b/core/sql/optimizer/NAClusterInfo.h
@@ -75,7 +75,7 @@ extern void setUpClusterInfo(CollHeap* heap);
 #define MAX_NUM_SMPS_NSK  16     // number of SMPs in the cluster for NSK
 #define MAX_NUM_SMPS_SQ   512    // number of CPUs in the cluster for SQ
 //<pb>
-
+ULng32 clusterNumHashFunc(const CollIndex& num);
 
 //used to encapsulate dp2 names
 class DP2name : public NABasicObject
@@ -179,6 +179,9 @@ public:
   virtual size_t   totalMemoryAvailable() const = 0;
   virtual size_t   virtualMemoryAvailable() = 0;
 
+  // number of physical nodes (from Trafodion monitor or OSIM)
+  Int32 numOfPhysicalSMPs();
+  // this is an adjusted number, based on CQDs
   Int32 numOfSMPs();
 
   // This is called by captureNAClusterInfo() to capture the OSIM
@@ -247,10 +250,9 @@ protected :
   short localSMP_;
 
   //------------------------------------------------------------------------
-  // Earlier smpCount_ was the number of CPUs on a segment.  On Linux,
-  // smpCount_ is the number of Linux nodes in the cluster.
+  // physical number of SMPs/Linux nodes (real configuration or OSIM config)
   //------------------------------------------------------------------------
-  Int32 smpCount_;
+  Int32 physicalSMPCount_;
 
   //------------------------------------------------------------------------
   // heap_ is where this NAClusterInfo was allocated.  This should be the
@@ -309,8 +311,6 @@ public:
    float    seekTime() const;
    Int32      cpuArchitecture() const;
 
-   size_t   numLinuxNodes() const { return smpCount_; }
-
    //-------------------------------------------------------------------------
    // On Linux, numberOfCpusPerSMP() returns the number of Linux nodes in the
    // cluster.

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/optimizer/NodeMap.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/NodeMap.cpp b/core/sql/optimizer/NodeMap.cpp
index 1c4d5ee..a901948 100644
--- a/core/sql/optimizer/NodeMap.cpp
+++ b/core/sql/optimizer/NodeMap.cpp
@@ -2068,7 +2068,15 @@ short NodeMap::codeGen(const PartitioningFunction *partFunc,
 NABoolean
 NodeMap::hasRemotePartitions() const
 {
-  short sysNum = OSIM_MYSYSTEMNUMBER();
+  short sysNum;
+  try {
+      sysNum = OSIM_MYSYSTEMNUMBER();
+  }
+  catch(OsimLogException & e)
+  {
+        OSIM_errorMessage(e.getErrMessage());
+        return FALSE;
+  }
 
   for (ULng32 i = 0; i < getNumEntries(); i++) {
     const NodeMapEntry *ne = getNodeMapEntry(i); 
@@ -2106,7 +2114,7 @@ void NodeMap::assignScanInfos(HivePartitionAndBucketKey *hiveSearchKey)
   NABoolean useLocality = useLocalityForHiveScanInfo();
   // distribute <n> files associated the hive scan among numESPs.
   HiveFileIterator i;
-  HHDFSStatsBase selectedStats;
+  HHDFSStatsBase selectedStats(/* HHDFSTableStats *table */ NULL);  // TODO: fix this later
 
   CMPASSERT(type_ = HIVE);
   hiveSearchKey->accumulateSelectedStats(selectedStats);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/optimizer/ObjectNames.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ObjectNames.cpp b/core/sql/optimizer/ObjectNames.cpp
index bff93d4..d309ca6 100644
--- a/core/sql/optimizer/ObjectNames.cpp
+++ b/core/sql/optimizer/ObjectNames.cpp
@@ -356,6 +356,23 @@ const NAString SchemaName::getSchemaNameAsAnsiString() const
   return result;
 }
 
+void SchemaName::getHiveSchemaName(
+     NAString &hiveSchemaName         // OUT: Hive schema name
+                                   ) const
+{
+  NAString hiveDefSchema =
+    ActiveSchemaDB()->getDefaults().getValue(HIVE_DEFAULT_SCHEMA);
+  hiveDefSchema.toUpper();
+
+  if (schemaName_ == hiveDefSchema)
+    hiveSchemaName = HiveMetaData::getDefaultSchemaName();
+  else
+    {
+      hiveSchemaName = schemaName_;
+      hiveSchemaName.toLower();
+    }
+}
+
 
 // ODBC SHORTANSI -- the actual MPLoc is encoded in the schName
 // using underscore delimiters, i.e. "systemName_volumeName_subvolName".

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/optimizer/ObjectNames.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ObjectNames.h b/core/sql/optimizer/ObjectNames.h
index dadffe5..ee3b467 100644
--- a/core/sql/optimizer/ObjectNames.h
+++ b/core/sql/optimizer/ObjectNames.h
@@ -188,6 +188,9 @@ public:
   const NAString  getUnqualifiedSchemaNameAsAnsiString() const
 				 { return ToAnsiIdentifier(schemaName_); }
 
+  // Translate Trafodion to Hive schema
+  void getHiveSchemaName(NAString &hiveSchemaName) const;
+
   // mutator
   void setSchemaName(const NAString &schName)   { schemaName_ = schName; }
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/802029e5/core/sql/optimizer/OptPhysRelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/OptPhysRelExpr.cpp b/core/sql/optimizer/OptPhysRelExpr.cpp
index 392ee8d..2eb90f2 100644
--- a/core/sql/optimizer/OptPhysRelExpr.cpp
+++ b/core/sql/optimizer/OptPhysRelExpr.cpp
@@ -14506,7 +14506,7 @@ PhysicalProperty * FileScan::synthHiveScanPhysicalProperty(
   const HHDFSTableStats *tableStats = hiveSearchKey_->getHDFSTableStats();
 
   // stats for partitions/buckets selected by predicates
-  HHDFSStatsBase selectedStats;
+  HHDFSStatsBase selectedStats((HHDFSTableStats *)tableStats);
 
   hiveSearchKey_->accumulateSelectedStats(selectedStats);
 


Mime
View raw message