trafodion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sure...@apache.org
Subject [10/50] [abbrv] trafodion git commit: Fixed parent clone issue and ptpCommAccept thread shutdown logic Added -nid <nid> argument to specify shell attach to a <nid> when in virtual cluster.
Date Sat, 16 Jun 2018 17:09:40 GMT
Fixed parent clone issue and ptpCommAccept thread shutdown logic
Added -nid <nid> argument to specify shell attach to a <nid> when in virtual cluster.


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/6dc990fe
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/6dc990fe
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/6dc990fe

Branch: refs/heads/master
Commit: 6dc990fe25798d46b51fb5c4932d92d58ad6aa0a
Parents: 38eb84e
Author: Zalo Correa <zalo.correa@esgyn.com>
Authored: Thu Mar 29 17:17:47 2018 -0700
Committer: Zalo Correa <zalo.correa@esgyn.com>
Committed: Thu Mar 29 17:17:47 2018 -0700

----------------------------------------------------------------------
 core/sqf/monitor/linux/cluster.cxx             |  61 +++++++----
 core/sqf/monitor/linux/cluster.h               |   8 +-
 core/sqf/monitor/linux/monitor.cxx             |   2 +-
 core/sqf/monitor/linux/notice.cxx              |  57 ++++++----
 core/sqf/monitor/linux/pnode.cxx               |   2 +
 core/sqf/monitor/linux/pnode.h                 |   9 +-
 core/sqf/monitor/linux/process.cxx             | 112 +++++++++++++-------
 core/sqf/monitor/linux/process.h               |   5 +-
 core/sqf/monitor/linux/ptpclient.cxx           |  82 +++++---------
 core/sqf/monitor/linux/ptpclient.h             |   6 +-
 core/sqf/monitor/linux/ptpcommaccept.cxx       |   6 +-
 core/sqf/monitor/linux/shell.cxx               |  38 ++++++-
 core/sqf/monitor/test/monitor.env              |  18 ++--
 core/sqf/monitor/test/runtest                  |  46 +++-----
 core/sqf/monitor/test/sqconfig.monitor.virtual |   5 +-
 15 files changed, 270 insertions(+), 187 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/cluster.cxx b/core/sqf/monitor/linux/cluster.cxx
index 101cec2..070230e 100644
--- a/core/sqf/monitor/linux/cluster.cxx
+++ b/core/sqf/monitor/linux/cluster.cxx
@@ -86,7 +86,7 @@ extern CCommAcceptMon CommAcceptMon;
 extern char MyMon2NsPort[MPI_MAX_PORT_NAME];
 #else
 extern bool NameServerEnabled;
-extern char MyMon2MonPort[MPI_MAX_PORT_NAME];
+extern char MyPtPPort[MPI_MAX_PORT_NAME];
 #endif
 extern bool SMSIntegrating;
 extern int CreatorShellPid;
@@ -8315,7 +8315,7 @@ void CCluster::InitServerSock( void )
 #ifdef NAMESERVER_PROCESS
     int mon2nsPort = 0;
 #else
-    int mon2monPort = 0;
+    int ptpPort = 0;
 #endif
 
     unsigned char addr[4];
@@ -8490,7 +8490,7 @@ void CCluster::InitServerSock( void )
             int val;
             errno = 0;
             val = strtol(env, NULL, 10);
-            if ( errno == 0) mon2monPort = val;
+            if ( errno == 0) ptpPort = val;
         }
         else
         {
@@ -8505,38 +8505,39 @@ void CCluster::InitServerSock( void )
         // For virtual env, add PNid to the port so we can still test without collisions
of port numbers
         if (!IsRealCluster)
         {
-            mon2monPort += MyNode->GetPNid();
+            ptpPort += MyNode->GetPNid();
         }
     
-        mon2monSock_ = MkSrvSock( &mon2monPort );
-        if ( mon2monSock_ < 0 )
+        ptpSock_ = MkSrvSock( &ptpPort );
+        if ( ptpSock_ < 0 )
         {
             char ebuff[MON_STRING_BUF_SIZE];
             char buf[MON_STRING_BUF_SIZE];
             snprintf( buf, sizeof(buf)
                     , "[%s@%d] MkSrvSock(MON2MON_COMM_PORT=%d) error: %s\n"
-                    , method_name, __LINE__, mon2monPort
+                    , method_name, __LINE__, ptpPort
                     , strerror_r( errno, ebuff, MON_STRING_BUF_SIZE ) );
             mon_log_write( MON_CLUSTER_INITSERVERSOCK_6, SQ_LOG_CRIT, buf );
             abort();
         }
         else
         {
-            snprintf( MyMon2MonPort, sizeof(MyMon2MonPort)
+            snprintf( MyPtPPort, sizeof(MyPtPPort)
                     , "%d.%d.%d.%d:%d"
                     , (int)((unsigned char *)addr)[0]
                     , (int)((unsigned char *)addr)[1]
                     , (int)((unsigned char *)addr)[2]
                     , (int)((unsigned char *)addr)[3]
-                    , mon2monPort );
-            MyNode->SetMon2MonPort( MyMon2MonPort );
+                    , ptpPort );
+            MyNode->SetPtPPort( MyPtPPort );
+            MyNode->SetPtPSocketPort( ptpPort );
     
             if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
-                trace_printf( "%s@%d Initialized my mon2mon socket port, "
-                              "pnid=%d (%s:%s) (mon2monPort=%s)\n"
+                trace_printf( "%s@%d Initialized my ptp socket port, "
+                              "pnid=%d (%s:%s) (ptpPort=%s)\n"
                             , method_name, __LINE__
-                            , MyPNID, MyNode->GetName(), MyMon2MonPort
-                            , MyNode->GetMon2MonPort() );
+                            , MyPNID, MyNode->GetName(), MyPtPPort
+                            , MyNode->GetPtPPort() );
     
         }
     }
@@ -8579,12 +8580,12 @@ int CCluster::AcceptSyncSock( void )
 }
 
 #ifndef NAMESERVER_PROCESS
-int CCluster::AcceptMon2MonSock( void )
+int CCluster::AcceptPtPSock( void )
 {
-    const char method_name[] = "CCluster::AcceptMon2MonSock";
+    const char method_name[] = "CCluster::AcceptPtPSock";
     TRACE_ENTRY;
 
-    int csock = AcceptSock( mon2monSock_ );
+    int csock = AcceptSock( ptpSock_ );
 
     TRACE_EXIT;
     return( csock  );
@@ -8871,11 +8872,33 @@ int CCluster::Connect( const char *portName )
     return ( sock );
 }
 
+#ifndef NAMESERVER_PROCESS
+void CCluster::ConnectToPtPCommSelf( void )
+{
+    const char method_name[] = "CCluster::ConnectToPtPCommSelf";
+    TRACE_ENTRY;
+
+    Connect( MyNode->GetPtPSocketPort() );
+
+    TRACE_EXIT;
+}
+#endif
+
 void CCluster::ConnectToSelf( void )
 {
     const char method_name[] = "CCluster::ConnectToSelf";
     TRACE_ENTRY;
 
+    Connect( MyNode->GetCommSocketPort() );
+
+    TRACE_EXIT;
+}
+
+void CCluster::Connect( int socketPort )
+{
+    const char method_name[] = "CCluster::Connect";
+    TRACE_ENTRY;
+
     int  sock;     // socket
     int  ret;      // returned value
 #if defined(_XOPEN_SOURCE_EXTENDED)
@@ -8928,7 +8951,7 @@ void CCluster::ConnectToSelf( void )
     memset( (char *) &sockinfo, 0, size );
     memcpy( (char *) &sockinfo.sin_addr, (char *) he->h_addr, 4 );
     sockinfo.sin_family = AF_INET;
-    sockinfo.sin_port = htons( (unsigned short) MyNode->GetCommSocketPort() );
+    sockinfo.sin_port = htons( (unsigned short) socketPort );
 
     connect_failures = 0;
     ret = 1;
@@ -8942,7 +8965,7 @@ void CCluster::ConnectToSelf( void )
                         , (int)((unsigned char *)he->h_addr)[1]
                         , (int)((unsigned char *)he->h_addr)[2]
                         , (int)((unsigned char *)he->h_addr)[3]
-                        , MyNode->GetCommSocketPort()
+                        , socketPort
                         , connect_failures );
         }
 

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/cluster.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/cluster.h b/core/sqf/monitor/linux/cluster.h
index 90c2092..f4f9147 100644
--- a/core/sqf/monitor/linux/cluster.h
+++ b/core/sqf/monitor/linux/cluster.h
@@ -109,9 +109,13 @@ public:
 #ifdef NAMESERVER_PROCESS
     int  AcceptMon2NsSock( void );
 #else
-    int  AcceptMon2MonSock( void );
+    int  AcceptPtPSock( void );
 #endif
     int  Connect( const char *portName );
+    void Connect( int socketPort );
+#ifndef NAMESERVER_PROCESS
+    void ConnectToPtPCommSelf( void );
+#endif
     void ConnectToSelf( void );
     int  SetKeepAliveSockOpt( int sock );
     int  MkCltSock( const char *portName );
@@ -231,7 +235,7 @@ protected:
 #ifdef NAMESERVER_PROCESS
     int            mon2nsSock_;
 #else
-    int            mon2monSock_;
+    int            ptpSock_;
 #endif
     int            epollFD_;
     int           *indexToPnid_;

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/monitor.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/monitor.cxx b/core/sqf/monitor/linux/monitor.cxx
index 77d7509..e6aa68b 100755
--- a/core/sqf/monitor/linux/monitor.cxx
+++ b/core/sqf/monitor/linux/monitor.cxx
@@ -111,7 +111,7 @@ char MySyncPort[MPI_MAX_PORT_NAME] = {'\0'};
 #ifdef NAMESERVER_PROCESS
 char MyMon2NsPort[MPI_MAX_PORT_NAME] = {'\0'};
 #else
-char MyMon2MonPort[MPI_MAX_PORT_NAME] = {'\0'};
+char MyPtPPort[MPI_MAX_PORT_NAME] = {'\0'};
 #endif
 char Node_name[MPI_MAX_PROCESSOR_NAME] = {'\0'};
 sigset_t SigSet;

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/notice.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/notice.cxx b/core/sqf/monitor/linux/notice.cxx
index 85a9eab..f478f45 100644
--- a/core/sqf/monitor/linux/notice.cxx
+++ b/core/sqf/monitor/linux/notice.cxx
@@ -269,8 +269,9 @@ void CNotice::Notify( SQ_LocalIOToClient::bcastPids_t *bcastPids )
 
 
                     if (trace_settings & (TRACE_SYNC | TRACE_REQUEST | TRACE_PROCESS))
-                        trace_printf( "%s@%d - Sending %s (%d, %d:%d) Death "
-                                      "message to %s (%d, %d:%d)\n"
+                    {
+                        trace_printf( "%s@%d - Sending Death message of"
+                                      " %s (%d, %d:%d) to %s (%d, %d:%d)\n"
                                     , method_name, __LINE__
                                     , Process->GetName()
                                     , Process->GetNid()
@@ -280,41 +281,55 @@ void CNotice::Notify( SQ_LocalIOToClient::bcastPids_t *bcastPids )
                                     , notify->GetNid()
                                     , notify->GetPid()
                                     , notify->GetVerifier());
-
-
+                    }
                 }
                 else
                 {
                     if (trace_settings & (TRACE_SYNC | TRACE_REQUEST | TRACE_PROCESS))
-                        trace_printf( "%s@%d - Process %s (%d, %d:%d)" 
-                                      " doesn't want Death message" "\n"
+                    {
+                        trace_printf( "%s@%d - Death message of %s (%d, %d:%d)" 
+                                      " not wanted by %s (%d, %d:%d)\n"
                                     , method_name, __LINE__
+                                    , Process->GetName()
+                                    , Process->GetNid()
+                                    , Process->GetPid()
+                                    , Process->GetVerifier()
                                     , notify->GetName()
                                     , notify->GetNid()
                                     , notify->GetPid()
-                                    , notify->GetVerifier() );
+                                    , notify->GetVerifier());
+                    }
                 }
             }
             else
             {
                 if (trace_settings & (TRACE_SYNC | TRACE_REQUEST | TRACE_PROCESS))
-                    trace_printf( "%s@%d - Not processed for clone Process %s (%d, %d:%d)\n"
+                {
+                    trace_printf( "%s@%d - Death message of %s (%d, %d:%d)" 
+                                  " not processed for clone %s (%d, %d:%d)\n"
                                 , method_name, __LINE__
+                                , Process->GetName()
+                                , Process->GetNid()
+                                , Process->GetPid()
+                                , Process->GetVerifier()
                                 , notify->GetName()
                                 , notify->GetNid()
                                 , notify->GetPid()
-                                , notify->GetVerifier() );
+                                , notify->GetVerifier());
+                }
             }
         }
         else
         {
             if (trace_settings & (TRACE_SYNC | TRACE_REQUEST | TRACE_PROCESS))
-               trace_printf( "%s@%d - Can't find process %s (%d, %d:%d)\n"
-                           , method_name, __LINE__
-                           , name_.c_str()
-                           , Nid
-                           , Pid
-                           , verifier_ );
+            {
+                trace_printf( "%s@%d - Can't find process %s (%d, %d:%d)\n"
+                            , method_name, __LINE__
+                            , name_.c_str()
+                            , Nid
+                            , Pid
+                            , verifier_ );
+            }
         }
     }
     TRACE_EXIT;
@@ -456,8 +471,8 @@ void CNotice::NotifyNid( NidQueue_t *nidQueue )
                     if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
                     {
                         CLNode *lnode = Nodes->GetLNode( Nid );
-                        trace_printf( "%s@%d - Sending process %s (%d, %d:%d) "
-                                      "exit message to %s (nid=%d)\n"
+                        trace_printf( "%s@%d - Sending exit message of"
+                                      " %s (%d, %d:%d) to %s (nid=%d)\n"
                                     , method_name, __LINE__
                                     , Process->GetName()
                                     , Process->GetNid()
@@ -471,9 +486,13 @@ void CNotice::NotifyNid( NidQueue_t *nidQueue )
                 {
                     if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
                     {
-                        trace_printf( "%s@%d - Process %s (%d, %d:%d)" 
-                                      " doesn't want Death message\n"
+                        trace_printf( "%s@%d - Death message of %s (%d, %d:%d)" 
+                                      " not wanted by %s (%d, %d:%d)\n"
                                     , method_name, __LINE__
+                                    , Process->GetName()
+                                    , Process->GetNid()
+                                    , Process->GetPid()
+                                    , Process->GetVerifier()
                                     , remoteProcess->GetName()
                                     , remoteProcess->GetNid()
                                     , remoteProcess->GetPid()

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/pnode.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx
index e3923a0..6affd4f 100644
--- a/core/sqf/monitor/linux/pnode.cxx
+++ b/core/sqf/monitor/linux/pnode.cxx
@@ -169,6 +169,8 @@ CNode::CNode( char *name, int pnid, int rank )
       ,zid_(pnid)
 #ifdef NAMESERVER_PROCESS
       ,monConnCount_(0)
+#else
+      ,ptpSocketPort_(-1)
 #endif
       ,commSocketPort_(-1)
       ,syncSocketPort_(-1)

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/pnode.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.h b/core/sqf/monitor/linux/pnode.h
index 48af3bc..f0b89bb 100644
--- a/core/sqf/monitor/linux/pnode.h
+++ b/core/sqf/monitor/linux/pnode.h
@@ -245,7 +245,8 @@ public:
     inline const char *GetMon2NsPort( void ) { return mon2NsPort_.c_str(); }
     inline int GetMonConnCount( void ) { return monConnCount_; }
 #else
-    inline const char *GetMon2MonPort( void ) { return mon2MonPort_.c_str(); }
+    inline const char *GetPtPPort( void ) { return ptpPort_.c_str(); }
+    inline int   GetPtPSocketPort( void ) { return( ptpSocketPort_ ); }
 #endif
     inline int   GetCommSocketPort( void ) { return( commSocketPort_ ); }
     inline int   GetSyncSocketPort( void ) { return( syncSocketPort_ ); }
@@ -315,7 +316,8 @@ public:
 #ifdef NAMESERVER_PROCESS
     inline void SetMon2NsPort( char *mon2NsPort) { mon2NsPort_ = mon2NsPort; }
 #else
-    inline void SetMon2MonPort( char *mon2MonPort) { mon2MonPort_ = mon2MonPort; }  
+    inline void SetPtPPort( char *ptpPort) { ptpPort_ = ptpPort; }  
+    inline void SetPtPSocketPort( int ptpSocketPort) { ptpSocketPort_ = ptpSocketPort; }
 #endif
     //inline void SetSockPort( int sockPort ) { sockPort_ = sockPort; }
     inline void SetCommSocketPort( int commSocketPort) { commSocketPort_ = commSocketPort;
}
@@ -414,7 +416,8 @@ private:
     string        mon2NsPort_;        // monitor to ns port
     int           monConnCount_;      // monitor connections
 #else
-    string        mon2MonPort_;
+    string        ptpPort_;
+    int           ptpSocketPort_;           // point-2-point socket port
 #endif
     int           commSocketPort_;          // re-integration socket port
     int           syncSocketPort_;          // algather socket port

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/process.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/process.cxx b/core/sqf/monitor/linux/process.cxx
index 02d6276..017ce76 100644
--- a/core/sqf/monitor/linux/process.cxx
+++ b/core/sqf/monitor/linux/process.cxx
@@ -545,11 +545,14 @@ bool CProcess::procExitReg(CProcess *targetProcess,
     {   // This process is not the parent of the target process (parent
         // processes automatically get process death notifications.)
 
-        // Add entry to list of processes that are being monitored
-        // by this process.
         nidPid_t target = { targetProcess->Nid, targetProcess->Pid };
         deathInterestLock_.lock();
-        deathInterest_.push_back ( target );
+        // Add entry to list of processes that are being monitored
+        // by this process.
+        deathInterest_.push_back( target );
+        // Add entry to set of nids of processes that are being monitored
+        // by this process.
+        deathInterestNid_.insert( targetProcess->Nid );
         deathInterestLock_.unlock();
 
         // Register interest with the target process 
@@ -580,12 +583,54 @@ bool CProcess::procExitReg(CProcess *targetProcess,
 #endif
 
 #ifndef NAMESERVER_PROCESS
+void CProcess::procExitNotifierNodes( void )
+{
+    const char method_name[] = "CProcess::procExitNotifierNodes";
+    TRACE_ENTRY;
+
+    CLNode *targetLNode;
+    CNode  *targetNode;
+    nidSet_t::iterator it;
+
+    // Remove death notice registration for all entries on list
+    deathInterestLock_.lock();
+    for ( it = deathInterestNid_.begin(); it != deathInterestNid_.end(); ++it)
+    {
+        targetLNode = Nodes->GetLNode ( *it );
+        if (targetLNode)
+        {
+            targetNode = targetLNode->GetNode();
+        }
+
+        if ( targetNode )
+        {
+            if (NameServerEnabled && targetNode->GetPNid() != MyPNID)
+            {
+                int rc = -1;
+                // Forward the process exit to the target node
+                rc = PtpClient->ProcessExit( this 
+                                           , targetLNode->GetNid()
+                                           , targetNode->GetName() ); 
+                if (rc)
+                {
+                    // TODO: Error handling
+                }
+            }
+        }
+    }
+    deathInterestNid_.clear();
+    deathInterestLock_.unlock();
+
+    TRACE_EXIT;
+}
+#endif
+
+#ifndef NAMESERVER_PROCESS
 void CProcess::procExitUnregAll ( _TM_Txid_External transId )
 {
     const char method_name[] = "CProcess::procExitUnregAll";
     TRACE_ENTRY;
 
-    nidPidList_t::iterator iter;
     CLNode *node;
     CProcess *targetProcess = NULL;
     nidPidList_t::iterator it;
@@ -3222,48 +3267,39 @@ void CProcess::Exit( CProcess *parent )
 #ifndef NAMESERVER_PROCESS
     if (NameServerEnabled)
     {
-        if ( parent && parent->IsClone() && Pid != -1 )
+        if ( parent )
         {
-            int targetNid = parent->GetNid();
-            CLNode *targetLNode = Nodes->GetLNode( targetNid );
-            // Send the process exit to the target node
-            int rc = PtpClient->ProcessExit( this
-                                           , targetNid
-                                           , targetLNode->GetNode()->GetName() );
-            if (rc)
+            if ( parent->IsClone() && Pid != -1 )
             {
-                // TODO: Error handling
+                int targetNid = parent->GetNid();
+                CLNode *targetLNode = Nodes->GetLNode( targetNid );
+                // Send the process exit to the parent node
+                int rc = PtpClient->ProcessExit( this
+                                               , targetNid
+                                               , targetLNode->GetNode()->GetName()
);
+                if (rc)
+                {
+                    // TODO: Error handling
+                }
             }
-#if 0
-            // TODO: This is not the correct place. It needs to be found!
-            //       When the parent process is in a remote node and
-            //       the local node contains child processes,
-            //       a clone of the parent is created at child creation time,
-            //       when all child processes are deleted, it leaves the
-            //       parent clone process. Need to determine when all
-            //       child process objects which reference the parent clone
-            //       are deleted so the parent clone object can be deleted.
-            //       The symptom is that shutdown never occurs since there
-            //       are object which have not been deleted and the process
-            //       counts prevent the shutdown from completing.
-            if (parent->childCount() == 0)
+        }
+        else
+        {
+            if (GetParentNid() != -1)
             {
-                if (trace_settings & (TRACE_SYNC_DETAIL | TRACE_REQUEST_DETAIL | TRACE_PROCESS_DETAIL))
+                int targetNid = GetParentNid();
+                CLNode *targetLNode = Nodes->GetLNode( targetNid );
+                // Send the process exit to the parent node
+                int rc = PtpClient->ProcessExit( this
+                                               , targetNid
+                                               , targetLNode->GetNode()->GetName()
);
+                if (rc)
                 {
-                    trace_printf( "%s@%d" " - Deleting parent %s (%d,%d:%d) of last child
%s (%d,%d:%d) \n"
-                                , method_name, __LINE__
-                                , parent->GetName(), parent->GetNid()
-                                , parent->GetPid(), parent->GetVerifier()
-                                , GetName(), GetNid(), GetPid(), GetVerifier() );
+                    // TODO: Error handling
                 }
-
-                CNode *parentNode = Nodes->GetLNode(parent->GetNid())->GetNode();
-                parentNode->DelFromNameMap( parent );
-                parentNode->DelFromPidMap( parent );
-                parentNode->DeleteFromList( parent );
             }
-#endif
         }
+        procExitNotifierNodes();
     }
 #endif
 

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/process.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/process.h b/core/sqf/monitor/linux/process.h
index 9445f6e..90954f3 100644
--- a/core/sqf/monitor/linux/process.h
+++ b/core/sqf/monitor/linux/process.h
@@ -409,6 +409,7 @@ class CProcess
 
 
     bool procExitReg(CProcess *targetProcess, _TM_Txid_External transId);
+    void procExitNotifierNodes( void );
     void procExitUnregAll( _TM_Txid_External transId );
 
     void validateObj( void );
@@ -549,8 +550,10 @@ private:
     // Container to keep track of the processes for which this process
     // is interested in process death.  deathInterestLock_ is used to
     // protect both the deathInterest_ and CNotice list.
+    typedef set<int> nidSet_t;
     nidPidList_t deathInterest_;
-    CLock       deathInterestLock_;
+    nidSet_t     deathInterestNid_;
+    CLock        deathInterestLock_;
 
     CNotice       *NoticeHead;   // List of processes requesting death notice 
     CNotice       *NoticeTail;

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/ptpclient.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/ptpclient.cxx b/core/sqf/monitor/linux/ptpclient.cxx
index 5e1380e..4f3f292 100644
--- a/core/sqf/monitor/linux/ptpclient.cxx
+++ b/core/sqf/monitor/linux/ptpclient.cxx
@@ -57,7 +57,7 @@ extern bool IsRealCluster;
 extern CMeas Meas;
 
 CPtpClient::CPtpClient (void)
-          : mon2monSock_(0)
+          : ptpSock_(0)
           , seqNum_(0)
 {
     const char method_name[] = "CPtpClient::CPtpClient";
@@ -89,13 +89,13 @@ CPtpClient::~CPtpClient (void)
     TRACE_EXIT;
 }
 
-int CPtpClient::InitializePtpClient( char * mon2monPort )
+int CPtpClient::InitializePtpClient( char * ptpPort )
 {
     const char method_name[] = "CPtpClient::InitializePtpClient";
     TRACE_ENTRY;
     int err = 0;
       
-    int sock = Monitor->MkCltSock( mon2monPort );                
+    int sock = Monitor->MkCltSock( ptpPort );                
     if (sock < 0)
     {
         err = sock;
@@ -108,50 +108,16 @@ int CPtpClient::InitializePtpClient( char * mon2monPort )
     }
     else
     {
-        mon2monSock_ = sock;
+        ptpSock_ = sock;
         if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
         {
             trace_printf( "%s@%d - connected to monitor node=%s, sock=%d\n"
                         , method_name, __LINE__
-                        , mon2monPort
-                        , mon2monSock_ );
+                        , ptpPort
+                        , ptpSock_ );
         }
     }
-#if 0    
-    // remove
-    if (err == 0)
-    {
-        nodeId_t msg;
-        strcpy(msg.nodeName, MyNode->GetName());
-        strcpy(msg.commPort, MyNode->GetCommPort());
-        strcpy(msg.syncPort, MyNode->GetSyncPort());
-        msg.pnid = MyNode->GetPNid();
-        msg.creatorPNid = -1;
-        msg.creatorShellPid = -1;
-        msg.creatorShellVerifier = -1;
-        msg.creator = false;
-        msg.ping = false;
-        if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
-        {
-            trace_printf( "%s@%d - sending node-info to monitor=%s, sock=%d\n"
-                        , method_name, __LINE__
-                        , mon2monPort
-                        , mon2monSock_);
-        }
-        err = SendSock((char *) &msg, sizeof(msg), mon2monSock_);
-        if (err)
-        {
-            if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
-            {
-                trace_printf( "%s@%d - error sending to monitor=%s, sock=%d, error=%d\n"
-                            , method_name, __LINE__
-                            , mon2monPort
-                            , mon2monSock_
-                            , err );
-            }
-        }
-    }
-#endif
+
     TRACE_EXIT;
     return err;
 }
@@ -807,7 +773,7 @@ int CPtpClient::SendToMon(const char *reqType, internal_msg_def *msg,
int size,
     TRACE_ENTRY;
     
     char monPortString[MAX_PROCESSOR_NAME];
-    char mon2monPort[MAX_PROCESSOR_NAME];
+    char ptpPort[MAX_PROCESSOR_NAME];
     int tempPort = basePort_;
     
     // For virtual env
@@ -828,16 +794,16 @@ int CPtpClient::SendToMon(const char *reqType, internal_msg_def *msg,
int size,
                     , basePort_ );
     }
 
-    memset( &mon2monPort, 0, MAX_PROCESSOR_NAME );
-    memset( &mon2monPortBase_, 0, MAX_PROCESSOR_NAME+100 );
+    memset( &ptpPort, 0, MAX_PROCESSOR_NAME );
+    memset( &ptpPortBase_, 0, MAX_PROCESSOR_NAME+100 );
 
-    strcat( mon2monPortBase_, hostName );
-    strcat( mon2monPortBase_, ":" );
+    strcat( ptpPortBase_, hostName );
+    strcat( ptpPortBase_, ":" );
     sprintf( monPortString,"%d", tempPort );
-    strcat( mon2monPort, mon2monPortBase_ );
-    strcat( mon2monPort, monPortString ); 
+    strcat( ptpPort, ptpPortBase_ );
+    strcat( ptpPort, monPortString ); 
 
-    int error = InitializePtpClient( mon2monPort );
+    int error = InitializePtpClient( ptpPort );
     if (error < 0)
     {
         TRACE_EXIT;
@@ -849,37 +815,37 @@ int CPtpClient::SendToMon(const char *reqType, internal_msg_def *msg,
int size,
         trace_printf( "%s@%d - sending %s REQ to Monitor=%s, sock=%d\n"
                     , method_name, __LINE__
                     , reqType
-                    , mon2monPort
-                    , mon2monSock_);
+                    , ptpPort
+                    , ptpSock_);
     }
 
-    error = SendSock((char *) &size, sizeof(size), mon2monSock_);
+    error = SendSock((char *) &size, sizeof(size), ptpSock_);
     if (error)
     {
         if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
         {
             trace_printf( "%s@%d - error sending to Monitor=%s, sock=%d, error=%d\n"
                         , method_name, __LINE__
-                        , mon2monPort
-                        , mon2monSock_
+                        , ptpPort
+                        , ptpSock_
                         , error );
         }
     }
     
-    error = SendSock((char *) msg, size, mon2monSock_);
+    error = SendSock((char *) msg, size, ptpSock_);
     if (error)
     {
         if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
         {
             trace_printf( "%s@%d - error sending to nameserver=%s, sock=%d, error=%d\n"
                         , method_name, __LINE__
-                        , mon2monPort
-                        , mon2monSock_
+                        , ptpPort
+                        , ptpSock_
                         , error );
         }
     }
     
-    close( mon2monSock_ );
+    close( ptpSock_ );
 
     TRACE_EXIT;
     return error;

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/ptpclient.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/ptpclient.h b/core/sqf/monitor/linux/ptpclient.h
index 7dd8b86..554bc71 100644
--- a/core/sqf/monitor/linux/ptpclient.h
+++ b/core/sqf/monitor/linux/ptpclient.h
@@ -40,7 +40,7 @@ public:
     CPtpClient( void );
     virtual ~CPtpClient( void );
 
-    int  InitializePtpClient( char * mon2monPort );
+    int  InitializePtpClient( char * ptpPort );
     int  ProcessClone( CProcess *process );
     int  ProcessExit( CProcess* process
                     , int parentNid
@@ -67,8 +67,8 @@ public:
 
 private:
     int  basePort_;
-    char mon2monPortBase_[MAX_PROCESSOR_NAME+100];
-    int  mon2monSock_;
+    char ptpPortBase_[MAX_PROCESSOR_NAME+100];
+    int  ptpSock_;
     int  seqNum_;
 
     int  ReceiveSock(char *buf, int size, int sockFd);

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/ptpcommaccept.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/ptpcommaccept.cxx b/core/sqf/monitor/linux/ptpcommaccept.cxx
index b070508..c6d5145 100644
--- a/core/sqf/monitor/linux/ptpcommaccept.cxx
+++ b/core/sqf/monitor/linux/ptpcommaccept.cxx
@@ -39,7 +39,7 @@ extern CMonitor *Monitor;
 extern CNode *MyNode;
 extern CNodeContainer *Nodes;
 extern int MyPNID;
-extern char MyMon2MonPort[MPI_MAX_PORT_NAME];
+extern char MyPtPPort[MPI_MAX_PORT_NAME];
 extern char *ErrorMsg (int error_code);
 extern const char *StateString( STATE state);
 extern CommType_t CommType;
@@ -235,7 +235,7 @@ void CPtpCommAccept::commAcceptorSock()
             }
     
             mem_log_write(CMonLog::MON_CONNTONEWMON_1);
-            sockFd = Monitor->AcceptMon2MonSock();
+            sockFd = Monitor->AcceptPtPSock();
         }
         else
         {
@@ -287,7 +287,7 @@ void CPtpCommAccept::shutdownWork(void)
 
     // Set flag that tells the PtpCommAccept thread to exit
     shutdown_ = true;   
-    Monitor->ConnectToSelf();
+    Monitor->ConnectToPtPCommSelf();
     CLock::wakeOne();
 
     if (trace_settings & TRACE_INIT)

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/linux/shell.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/shell.cxx b/core/sqf/monitor/linux/shell.cxx
index 52afe06..20b41e6 100644
--- a/core/sqf/monitor/linux/shell.cxx
+++ b/core/sqf/monitor/linux/shell.cxx
@@ -82,6 +82,7 @@ char Path[MAX_SEARCH_PATH];
 char Wdir[MAX_SEARCH_PATH];
 char prompt[13];
 int VirtualNodes = 0;
+int VirtualNid = -1;
 int NumNodes = 0;
 int NumLNodes = 0;
 int CurNodes = 0;
@@ -848,7 +849,7 @@ void TraceInit( int & argc, char **&argv )
             // line arguments.
             for (int j=i, k=i+2; k < argc; j++, k++)
             {
-                printf ("setting argv[%d] = argv[%d]\n", j, k);
+                //printf ("setting argv[%d] = argv[%d]\n", j, k);
                 argv[j] = argv[k];
             }
             argc -= 2;
@@ -868,6 +869,32 @@ void TraceInit( int & argc, char **&argv )
     }
 }
 
+void VirtualNidInit( int & argc, char **&argv )
+{
+    // Check for trace flags specified on the command line.
+    for (int i = 0; i < argc; i++)
+    {
+        if ( strcmp ( argv[i], "-nid" ) == 0 && (i != argc-1) )
+        {   // <nid> setting specified on command line.
+            VirtualNid = atoi ( argv[i+1] );
+
+            // Remove the virtual nid arguments from the list of command
+            // line arguments.
+            for (int j=i, k=i+2; k < argc; j++, k++)
+            {
+                //printf ("setting argv[%d] = argv[%d]\n", j, k);
+                argv[j] = argv[k];
+            }
+            argc -= 2;
+        }
+    }
+
+    if (VirtualNid != -1)
+    {
+        printf( "Using VirtualNid=%d\n", VirtualNid );
+    }
+}
+
 void RedirectFd(int orig_fd, char *fifo_name)
 {
     int rdir_fd;
@@ -9302,6 +9329,9 @@ int main (int argc, char *argv[])
     // Initialize trace settings
     TraceInit ( argc, argv );
 
+    // Initialize virtual <nid> from command line args
+    VirtualNidInit( argc, argv );
+    
     MyName = new char [MAX_PROCESS_PATH];
     // setup defaults
     strcpy (MyName, "SHELL");
@@ -9340,6 +9370,12 @@ int main (int argc, char *argv[])
         MyNid = 0;
     }
 
+    if ( VirtualNodes && VirtualNid != -1)
+    {
+        // Override NyNid with the command line nid value
+        MyNid = VirtualNid;
+    }
+
     msg = new struct message_def;
 
     // Load default node information

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/test/monitor.env
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/test/monitor.env b/core/sqf/monitor/test/monitor.env
index b8cf913..dd2cbb3 100644
--- a/core/sqf/monitor/test/monitor.env
+++ b/core/sqf/monitor/test/monitor.env
@@ -40,21 +40,21 @@ MONITOR_COMM_PORT=23330
 
 # Uncomment MON_TRACE_ENABLE and specific tracing level to enable 
 # Trafodion monitor process tracing
-#MON_TRACE_ENABLE=1
-#MON_TRACE_EVLOG_MSG=1
-#MON_TRACE_INIT=1
-#MON_TRACE_RECOVERY=1
-#MON_TRACE_REQUEST=1
-#MON_TRACE_PROCESS=1
-#MON_TRACE_NOTICE=1
-#MON_TRACE_NS=1
+MON_TRACE_ENABLE=1
+MON_TRACE_EVLOG_MSG=1
+MON_TRACE_INIT=1
+MON_TRACE_RECOVERY=1
+MON_TRACE_REQUEST=1
+MON_TRACE_PROCESS=1
+MON_TRACE_NOTICE=1
+MON_TRACE_NS=1
 #MON_TRACE_SYNC=1
 # Enable TC_TRACE_* along with MON_TRACE_TRAFCONFIG for more detail
 #MON_TRACE_TRAFCONFIG=1
 #MON_TRACE_MLIO=1
 
 #MON_TRACE_REQUEST_DETAIL=1
-#MON_TRACE_PROCESS_DETAIL=1
+MON_TRACE_PROCESS_DETAIL=1
 #MON_TRACE_NOTICE_DETAIL=1
 #MON_TRACE_SYNC_DETAIL=1
 #MON_TRACE_MLIO_DETAIL=1

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/test/runtest
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/test/runtest b/core/sqf/monitor/test/runtest
index 20c06a9..5580d0a 100755
--- a/core/sqf/monitor/test/runtest
+++ b/core/sqf/monitor/test/runtest
@@ -169,13 +169,14 @@ shell <<eof
  delay 3
  exec {name \$CTRLR, nid 0, out $TRAF_HOME/monitor/test/childExit.lst} childExitCtrl $trace
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
-shell -c ps 
+shell -nid 0 -c ps
+#shell -nid 0 -c ps;shell -nid 1 -c ps;shell -nid 2 -c ps;shell -nid 3 -c ps;shell -nid 4
-c ps;shell -nid 5 -c ps
+shell -nid 0 -c ps monitor;shell -nid 1 -c ps monitor;shell -nid 2 -c ps monitor;shell -nid
3 -c ps monitor;shell -nid 4 -c ps monitor;shell -nid 5 -c ps monitor
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -211,12 +212,11 @@ shell <<eof
  exec {pri 10,name \$CLIENT,nid 0, out $TRAF_HOME/monitor/test/multiNode.lst} client $trace
  delay 3
  !shutdown
- ps
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -243,13 +243,12 @@ shell <<eof
  delay 3
  exec {name \$CTRLR, nid 0, out $TRAF_HOME/monitor/test/regTest.lst} regTestCtrl $trace
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -287,13 +286,12 @@ shell <<eof
  !
  exec {name \$DEATH, nid 0, out $TRAF_HOME/monitor/test/deathNotice.lst} deathNotice $trace
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -324,13 +322,12 @@ shell <<eof
  delay 15
  exec {name \$PPROC, nid 0, out $TRAF_HOME/monitor/test/persistentProc.lst} persistentProc
$trace
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -350,13 +347,12 @@ shell <<eof
  delay 3
  exec {name \$PPROC, nid 0, out $TRAF_HOME/monitor/test/persistentProc.lst} persistentProc
$trace
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -382,13 +378,12 @@ shell <<eof
  delay 3
  exec {name \$DTMCTRL, nid 0, out $TRAF_HOME/monitor/test/dtmTest.lst} dtmCtrl $trace
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -420,13 +415,12 @@ shell <<eof
  delay 3
  exec {name \$SPXCTRL, nid 0, out $TRAF_HOME/monitor/test/spxTest.lst} spxCtrl $trace
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -453,13 +447,12 @@ shell <<eof
  delay 3
  exec {name \$PCRE8, nid 0, out $TRAF_HOME/monitor/test/procCreate.lst} procCreate $trace
-x
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -488,13 +481,12 @@ shell <<eof
  delay 3
  down 1 !
  delay 10
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -525,13 +517,12 @@ shell <<eof
  exec {nowait, nid 0, name \$CTRLR, out $TRAF_HOME/monitor/test/tmSync.lst} tmSyncCtrl -n
1,3,4,5,6 $trace
  wait $CTRLR
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -551,13 +542,12 @@ shell <<eof
  exec {nowait, nid 0, name \$CTRLR, out $TRAF_HOME/monitor/test/tmSync8.lst} tmSyncCtrl -n
8 $trace
  wait $CTRLR
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -579,13 +569,12 @@ shell <<eof
  exec {nowait, nid 0, name \$CTRLR, out $TRAF_HOME/monitor/test/tmSync10.lst} tmSyncCtrl
-n 10 $trace
  wait $CTRLR
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit
@@ -606,13 +595,12 @@ shell <<eof
  exec {nowait, nid 0, name \$CTRLR, out $TRAF_HOME/monitor/test/tmSync.lst} tmSyncCtrl -n
3,4,5,6,7 $trace
  wait $CTRLR
  delay 3
- ps
  !shutdown
  exit
 eof
-if ( [ $test '==' -1 ] ); then
 shell -c ps 
 shell -c ps monitor
+if ( [ $test '==' -1 ] ); then
 shell -a<<eof
  shutdown
  exit

http://git-wip-us.apache.org/repos/asf/trafodion/blob/6dc990fe/core/sqf/monitor/test/sqconfig.monitor.virtual
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/test/sqconfig.monitor.virtual b/core/sqf/monitor/test/sqconfig.monitor.virtual
index 8f6750c..9ab56e8 100644
--- a/core/sqf/monitor/test/sqconfig.monitor.virtual
+++ b/core/sqf/monitor/test/sqconfig.monitor.virtual
@@ -24,6 +24,9 @@ _virtualnodes 6
 end node
 
 begin name-server
-nodes=0
+#nodes=0
+nodes=0,1
+#nodes=0,1,2
+#nodes=0,1,2,3
 #nodes=0,1,2,3,4,5
 end name-server


Mime
View raw message