hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From y...@apache.org
Subject incubator-hawq git commit: HAWQ-671. Validation raises error when adding accepted containers into a down node and fix function typo
Date Thu, 14 Apr 2016 06:19:56 GMT
Repository: incubator-hawq
Updated Branches:
  refs/heads/master 9452055bc -> a60a54e5d


HAWQ-671. Validation raises error when adding accepted containers into a down node and fix
function typo


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/a60a54e5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/a60a54e5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/a60a54e5

Branch: refs/heads/master
Commit: a60a54e5dbaa5fb7d0d3299816b80fcb658ac94d
Parents: 9452055
Author: YI JIN <yjin@pivotal.io>
Authored: Thu Apr 14 16:19:15 2016 +1000
Committer: YI JIN <yjin@pivotal.io>
Committed: Thu Apr 14 16:19:15 2016 +1000

----------------------------------------------------------------------
 .../communication/rmcomm_RM2RMSEG.c             |  3 +-
 .../resourcemanager/include/resqueuemanager.h   |  2 +-
 src/backend/resourcemanager/resourcemanager.c   |  4 --
 src/backend/resourcemanager/resourcepool.c      | 28 ++++----
 src/backend/resourcemanager/resqueuemanager.c   | 67 ++++++++++++--------
 5 files changed, 62 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a60a54e5/src/backend/resourcemanager/communication/rmcomm_RM2RMSEG.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/communication/rmcomm_RM2RMSEG.c b/src/backend/resourcemanager/communication/rmcomm_RM2RMSEG.c
index 95bc194..44addcf 100644
--- a/src/backend/resourcemanager/communication/rmcomm_RM2RMSEG.c
+++ b/src/backend/resourcemanager/communication/rmcomm_RM2RMSEG.c
@@ -216,7 +216,8 @@ void receivedRUAliveResponse(AsyncCommMessageHandlerContext  context,
 	}
 	else
 	{
-		elog(DEBUG3, "Resource manager finds host %s still up.");
+		elog(DEBUG3, "Resource manager finds host %s still up.",
+					 GET_SEGRESOURCE_HOSTNAME(segres));
 	}
 
 	setSegResRUAlivePending(segres, false);

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a60a54e5/src/backend/resourcemanager/include/resqueuemanager.h
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/include/resqueuemanager.h b/src/backend/resourcemanager/include/resqueuemanager.h
index 5ebf539..946a859 100644
--- a/src/backend/resourcemanager/include/resqueuemanager.h
+++ b/src/backend/resourcemanager/include/resqueuemanager.h
@@ -430,7 +430,7 @@ UserInfo getUserByUserName( const char *userid, int useridlen, bool *exist);
 UserInfo getUserByUserOID ( int64_t useroid, bool *exist);
 
 int addNewResourceToResourceManager(int32_t memorymb, double core);
-int minusResourceFromReourceManager(int32_t memorymb, double core);
+int minusResourceFromResourceManager(int32_t memorymb, double core);
 int addNewResourceToResourceManagerByBundle(ResourceBundle bundle);
 int minusResourceFromResourceManagerByBundle(ResourceBundle bundle);
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a60a54e5/src/backend/resourcemanager/resourcemanager.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resourcemanager.c b/src/backend/resourcemanager/resourcemanager.c
index d744900..b3b10a2 100644
--- a/src/backend/resourcemanager/resourcemanager.c
+++ b/src/backend/resourcemanager/resourcemanager.c
@@ -2614,10 +2614,6 @@ void setAllNodesGRMDown()
 			 * trackers' capacity.
 			 */
 			setSegResHAWQAvailability(node, RESOURCE_SEG_STATUS_UNAVAILABLE);
-			/*
-			 * This call makes resource pool remove unused containers.
-			 */
-			returnAllGRMResourceFromSegment(node);
 			changedstatus = true;
 		}
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a60a54e5/src/backend/resourcemanager/resourcepool.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resourcepool.c b/src/backend/resourcemanager/resourcepool.c
index f6beee0..309ea9a 100644
--- a/src/backend/resourcemanager/resourcepool.c
+++ b/src/backend/resourcemanager/resourcepool.c
@@ -3210,8 +3210,6 @@ void returnAllGRMResourceFromSegment(SegResource segres)
 	GRMContainerSet ctns  = NULL;
 	uint32_t 		count = 0;
 
-	minusResourceFromResourceManagerByBundle(&(segres->Allocated));
-
 	for ( int i = 0 ; i < PQUEMGR->RatioCount ; ++i )
 	{
 		ctns = segres->ContainerSets[i];
@@ -3462,6 +3460,15 @@ void moveAllAcceptedGRMContainersToResPool(void)
 		addNewResourceToResourceManager(ctn->MemoryMB, ctn->Core);
 		removePendingResourceRequestInRootQueue(ctn->MemoryMB, ctn->Core, true);
 
+		if ( !IS_SEGSTAT_FTSAVAILABLE(ctn->Resource->Stat) )
+		{
+			elog(WARNING, "The container accepted by resource manager belongs to "
+						  "a down segment %s. Drop this container at once.",
+						  GET_SEGRESOURCE_HOSTNAME(ctn->Resource));
+			minusResourceFromResourceManager(ctn->MemoryMB, ctn->Core);
+			returnAllGRMResourceFromSegment(ctn->Resource);
+		}
+
 		counter++;
 		if ( counter >= rm_container_batch_limit )
 		{
@@ -3556,8 +3563,8 @@ void timeoutIdleGRMResourceToRB(void)
 			if ( realretcontnum > 0 )
 			{
 				/* Notify resource queue manager to minus allocated resource.*/
-				minusResourceFromReourceManager(realretcontnum * ratio,
-												realretcontnum * 1);
+				minusResourceFromResourceManager(realretcontnum * ratio,
+												 realretcontnum * 1);
 
 				elog(LOG, "Resource manager chose %d resource containers to "
 						  "return actually.",
@@ -3582,8 +3589,8 @@ void forceReturnGRMResourceToRB(void)
 	if ( realretcontnum > 0 )
 	{
 		/* Notify resource queue manager to minus allocated resource.*/
-		minusResourceFromReourceManager(realretcontnum * PQUEMGR->RatioReverseIndex[0],
-										realretcontnum * 1);
+		minusResourceFromResourceManager(realretcontnum * PQUEMGR->RatioReverseIndex[0],
+										 realretcontnum * 1);
 
 		elog(LOG, "Resource manager forced %d resource containers to "
 				  "return actually.",
@@ -3911,7 +3918,7 @@ void checkGRMContainerStatus(RB_GRMContainerStat ctnstats, int size)
 								  "it is not tracked by YARN" :
 								  "it is not treated active in YARN");
 
-					minusResourceFromReourceManager(ctn->MemoryMB, ctn->Core);
+					minusResourceFromResourceManager(ctn->MemoryMB, ctn->Core);
 
 					segreschanged = true;
 
@@ -3952,9 +3959,6 @@ void dropAllResPoolGRMContainersToToBeKicked(void)
 	for(uint32_t idx = 0; idx < PRESPOOL->SegmentIDCounter; idx++)
 	{
 	    SegResource segres = getSegResource(idx);
-
-	    /* Minus resource from resource queue resource quota. */
-	    minusResourceFromResourceManagerByBundle(&(segres->Allocated));
 		/* This call makes resource pool remove unused containers. */
 		dropAllGRMContainersFromSegment(segres);
 	}
@@ -4117,6 +4121,8 @@ void validateResourcePoolStatus(bool refquemgr)
 		return;
 	}
 
+	elog(RMLOG, "Validation original size (%d MB, %d CORE)", mem, core);
+
 	/*
 	 * If we use global resource manager to manage resource, the total capacity
 	 * might not follow the cluster memory to core ratio.
@@ -4126,7 +4132,7 @@ void validateResourcePoolStatus(bool refquemgr)
 	if ( totalallocmem > mem || totalalloccore > core )
 	{
 		elog(WARNING, "HAWQ RM Validation. Allocated too much resource in resource "
-					  "pool (%d MB, %lf CORE), maximum capacity (%d MB, %lf CORE)",
+					  "pool (%d MB, %lf CORE), maximum capacity (%d MB, %d CORE)",
 					  totalallocmem,
 					  totalalloccore,
 					  core,

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a60a54e5/src/backend/resourcemanager/resqueuemanager.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resqueuemanager.c b/src/backend/resourcemanager/resqueuemanager.c
index 98e9186..d8b62c0 100644
--- a/src/backend/resourcemanager/resqueuemanager.c
+++ b/src/backend/resourcemanager/resqueuemanager.c
@@ -3833,6 +3833,10 @@ int addNewResourceToResourceManagerByBundle(ResourceBundle bundle)
 
 int addNewResourceToResourceManager(int32_t memorymb, double core)
 {
+	elog(RMLOG, "addNewResourceToResourceManager input (%d MB, %lf CORE)",
+			    memorymb,
+				core);
+
 	if ( memorymb == 0 && core == 0 ) {
 		return FUNC_RETURN_OK;
 	}
@@ -3844,19 +3848,21 @@ int addNewResourceToResourceManager(int32_t memorymb, double core)
 	int32_t  ratioindex = getResourceQueueRatioIndex(ratio);
 	Assert( ratioindex >= 0 );
 
-	if ( ratioindex >= 0 ) {
-		addResourceBundleData(&(PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated),
-						  	  memorymb,
-							  core);
-	}
-	else {
-		elog(LOG, "To add resource (%d MB, %lf CORE), resource manager gets "
-				  "ratio %u not tracked.",
-				  memorymb,
-				  core,
-				  ratio);
-		return RESQUEMGR_NO_RATIO;
-	}
+	elog(RMLOG, "addNewResourceToResourceManager (%d MB, %lf CORE) "
+			    "plus (%d MB, %lf CORE)",
+				PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated.MemoryMB,
+				PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated.Core,
+				memorymb,
+				core);
+
+	addResourceBundleData(&(PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated),
+						  memorymb,
+						  core);
+
+	elog(RMLOG, "addNewResourceToResourceManager leavs (%d MB, %lf CORE)",
+			  	PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated.MemoryMB,
+				PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated.Core);
+
 
 	/* New resource is added. Try to dispatch resource to queries. */
 	PQUEMGR->toRunQueryDispatch = true;
@@ -3865,13 +3871,19 @@ int addNewResourceToResourceManager(int32_t memorymb, double core)
 
 int minusResourceFromResourceManagerByBundle(ResourceBundle bundle)
 {
-	return minusResourceFromReourceManager(bundle->MemoryMB, bundle->Core);
+	return minusResourceFromResourceManager(bundle->MemoryMB, bundle->Core);
 }
 
-int minusResourceFromReourceManager(int32_t memorymb, double core)
+int minusResourceFromResourceManager(int32_t memorymb, double core)
 {
+	elog(RMLOG, "minusResourceFromResourceManager input (%d MB, %lf CORE)",
+			    memorymb,
+				core);
+
 	if ( memorymb == 0 && core ==0 )
+	{
 		return FUNC_RETURN_OK;
+	}
 
 	/* Expect integer cores to add. */
 	Assert( trunc(core) == core );
@@ -3879,16 +3891,21 @@ int minusResourceFromReourceManager(int32_t memorymb, double core)
 	int32_t  ratioindex = getResourceQueueRatioIndex(ratio);
 	Assert( ratioindex >= 0 );
 
-	if ( ratioindex >= 0 ) {
-		minusResourceBundleData(&(PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated),
-						  	  	memorymb,
-						  	  	core);
-	}
-	else {
-		elog(WARNING, "HAWQ RM :: minusResourceFromReourceManager: "
-					  "Wrong ratio %u not tracked.", ratio);
-		return RESQUEMGR_NO_RATIO;
-	}
+	elog(RMLOG, "minusResourceFromResourceManager (%d MB, %lf CORE) "
+			    "minus (%d MB, %lf CORE)",
+				PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated.MemoryMB,
+				PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated.Core,
+				memorymb,
+				core);
+
+	minusResourceBundleData(&(PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated),
+						  	memorymb,
+						  	core);
+
+	elog(RMLOG, "minusResourceFromResourceManager leavs (%d MB, %lf CORE)",
+			    PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated.MemoryMB,
+				PQUEMGR->RatioTrackers[ratioindex]->TotalAllocated.Core);
+
 	return FUNC_RETURN_OK;
 }
 


Mime
View raw message