hawq-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jiny2 <...@git.apache.org>
Subject [GitHub] incubator-hawq pull request: Hawq-473. Implement adding a entry in...
Date Fri, 11 Mar 2016 08:45:14 GMT
Github user jiny2 commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/430#discussion_r55803099
  
    --- Diff: src/backend/resourcemanager/resourcepool.c ---
    @@ -901,37 +1019,58 @@ int addHAWQSegWithSegStat(SegStat segstat, bool *capstatchanged)
     	else {
     		segresource = getSegResource(segid);
     		Assert(segresource != NULL);
    +		uint32_t oldStatusDesc = segresource->Stat->StatusDesc;
     		uint8_t oldStatus = segresource->Stat->FTSAvailable;
    -		bool statusChanged = oldStatus != segstat->FTSAvailable;
     
     		/*
    -		 * Check if RM process is restarted in this segment.
     		 * If the latest reported RM process startup timestamp doesn't
     		 * match the previous, master RM consider segment's RM process
     		 * has restarted.
     		 * In rare case, the system's time is reset and segment's RM process
     		 * happen to get a same timestamp with previous one.
     		 */
    -		if (segresource->Stat->RMStartTimestamp != segstat->RMStartTimestamp)
    +		if (segresource->Stat->RMStartTimestamp != segstat->RMStartTimestamp &&
    +				(segresource->Stat->StatusDesc & SEG_STATUS_HEARTBEAT_TIMEOUT) == 0 &&
    +				(segresource->Stat->StatusDesc & SEG_STATUS_COMMUNICATION_ERROR) == 0 &&
    +				(segresource->Stat->StatusDesc & SEG_STATUS_RUALIVE_FAILED) == 0)
     		{
     			/*
    -			 * This segment's RM process has restarted,
    -			 * we should clean up old status, so mark it down.
    +			 * This segment's RM process has restarted.
    +			 * if StatusDesc doesn't have heartbeat timeout flag, or communication error,
    +			 * or RUAlive failed flag, this segment is set to DOWN.
    +			 * It will be set to UP when reports a new heartbeat.
     			 */
    -			if (oldStatus == RESOURCE_SEG_STATUS_AVAILABLE && !statusChanged)
    -			{
    -				segstat->FTSAvailable = RESOURCE_SEG_STATUS_UNAVAILABLE;
    -				statusChanged = true;
    -			}
    +			segresource->Stat->StatusDesc |= SEG_STATUS_RM_RESET;
     			segresource->Stat->RMStartTimestamp = segstat->RMStartTimestamp;
     			elog(LOG, "Master RM finds segment:%s 's RM process has restarted. "
    -					  "old status:%d, new status:%d",
    +					  "old status:%d",
     					  GET_SEGRESOURCE_HOSTNAME(segresource),
    -					  oldStatus,
    -					  segstat->FTSAvailable);
    +					  oldStatus);
    +		}
    +		else
    +		{
    +			if (segresource->Stat->RMStartTimestamp != segstat->RMStartTimestamp)
    +			{
    +				segresource->Stat->RMStartTimestamp = segstat->RMStartTimestamp;
    +			}
    +			/*
    +			 * Now clear heartbeat timeout flag, RM reset flag,
    +			 * RUAlive failed flag, communication error flag
    +			 * and RM reset flag in StatusDesc
    +			 */
    +			if ((segresource->Stat->StatusDesc & SEG_STATUS_HEARTBEAT_TIMEOUT) != 0)
    +				segresource->Stat->StatusDesc &= ~SEG_STATUS_HEARTBEAT_TIMEOUT;
    +			if ((segresource->Stat->StatusDesc & SEG_STATUS_RUALIVE_FAILED) != 0)
    +				segresource->Stat->StatusDesc &= ~SEG_STATUS_RUALIVE_FAILED;
    +			if ((segresource->Stat->StatusDesc & SEG_STATUS_COMMUNICATION_ERROR) !=
0)
    +				segresource->Stat->StatusDesc &= ~SEG_STATUS_COMMUNICATION_ERROR;
    +			if ((segresource->Stat->StatusDesc & SEG_STATUS_RM_RESET) != 0)
    +				segresource->Stat->StatusDesc &= ~SEG_STATUS_RM_RESET;
    --- End diff --
    
    suggest add debug log for status changes


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message