hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From y...@apache.org
Subject incubator-hawq git commit: HAWQ-201. Resource uneven or fragment problem may cause resource queue goes into long term pause
Date Thu, 03 Dec 2015 03:44:59 GMT
Repository: incubator-hawq
Updated Branches:
  refs/heads/master fb2fa9a97 -> dc12f127f


HAWQ-201. Resource uneven or fragment problem may cause resource queue goes into long term
pause


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/dc12f127
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/dc12f127
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/dc12f127

Branch: refs/heads/master
Commit: dc12f127f20170f63e6d5ac0f0b3b08399f42412
Parents: fb2fa9a
Author: Yi Jin <yjin@pivotal.io>
Authored: Thu Dec 3 11:44:32 2015 +0800
Committer: Yi Jin <yjin@pivotal.io>
Committed: Thu Dec 3 11:44:32 2015 +0800

----------------------------------------------------------------------
 src/backend/cdb/cdbvars.c                       | 23 +++---
 .../resourcemanager/include/resqueuemanager.h   | 10 +++
 src/backend/resourcemanager/resourcemanager.c   | 36 +++++++++-
 src/backend/resourcemanager/resqueuemanager.c   | 74 +++++++++++++-------
 src/backend/utils/misc/guc.c                    | 10 +++
 src/include/cdb/cdbvars.h                       |  1 +
 6 files changed, 117 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/dc12f127/src/backend/cdb/cdbvars.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c
index b5c79b8..8a614db 100644
--- a/src/backend/cdb/cdbvars.c
+++ b/src/backend/cdb/cdbvars.c
@@ -344,15 +344,20 @@ int		rm_min_resource_perseg;
 bool	rm_force_fifo_queue;
 
 bool	rm_session_lease_heartbeat_enable;
-int     rm_session_lease_timeout; 	/* How many seconds to wait before expiring
-									   allocated resource. */
-int		rm_resource_allocation_timeout;	/* How may seconds to wait before
-										   expiring queuing query resource
-										   request. */
-int		rm_resource_timeout;		/* How many seconds to wait before returning
-									   resource back to the resource broker. */
-int		rm_session_lease_heartbeat_interval; /* How many seconds to wait before sending
-										   	    another heart-beat to resource manager. */
+int     rm_session_lease_timeout; 			/* How many seconds to wait before
+											   expiring allocated resource. */
+int		rm_resource_allocation_timeout;		/* How may seconds to wait before
+										   	   expiring queuing query resource
+										   	   request. */
+int		rm_resource_timeout;				/* How many seconds to wait before
+											   returning resource back to the
+											   resource broker. */
+int		rm_request_timeoutcheck_interval; 	/* How many seconds to wait before
+											   checking resource contexts for
+											   timeout. */
+int		rm_session_lease_heartbeat_interval;/* How many seconds to wait before
+											   sending another heart-beat to
+											   resource manager. */
 
 int		rm_tolerate_nseg_limit;
 int		rm_rejectrequest_nseg_limit;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/dc12f127/src/backend/resourcemanager/include/resqueuemanager.h
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/include/resqueuemanager.h b/src/backend/resourcemanager/include/resqueuemanager.h
index 714bc2b..819ba05 100644
--- a/src/backend/resourcemanager/include/resqueuemanager.h
+++ b/src/backend/resourcemanager/include/resqueuemanager.h
@@ -257,6 +257,15 @@ struct UserInfoData {
 typedef struct UserInfoData *UserInfo;
 typedef struct UserInfoData  UserInfoData;
 
+enum RESOURCEPROBLEM
+{
+	RESPROBLEM_NO = 0,
+	RESPROBLEM_FRAGMENT,
+	RESPROBLEM_UNEVEN,
+	RESPROBLEM_TOOFEWSEG,
+	RESPROBLEM_COUNT
+};
+
 /******************************************************************************
  * In resource queue manager,  a list of resource queues are saved. This can  *
  * be referenced from global instance DRMGlobalInstance->ResourceQueueManager.*
@@ -315,6 +324,7 @@ struct DynResourceQueueManagerData {
     bool					 GRMQueueResourceTight;
     int						 ForcedReturnGRMContainerCount;
     bool					 toRunQueryDispatch;
+    bool	 				 hasResourceProblem[RESPROBLEM_COUNT];
 };
 typedef struct DynResourceQueueManagerData *DynResourceQueueManager;
 typedef struct DynResourceQueueManagerData  DynResourceQueueManagerData;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/dc12f127/src/backend/resourcemanager/resourcemanager.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resourcemanager.c b/src/backend/resourcemanager/resourcemanager.c
index ecdae5a..ff3748f 100644
--- a/src/backend/resourcemanager/resourcemanager.c
+++ b/src/backend/resourcemanager/resourcemanager.c
@@ -2020,13 +2020,13 @@ int  addResourceQueueAndUserFromProperties(List *queueprops, List
*userprops)
 int generateAllocRequestToBroker(void)
 {
 	int res = FUNC_RETURN_OK;
-	/*
-	 *--------------------------------------------------------------------------
+	/*--------------------------------------------------------------------------
 	 * This is a temporary restrict that HAWQ RM supports only one memory/core
 	 * ratio in current version.
 	 *--------------------------------------------------------------------------
 	 */
 	Assert( PQUEMGR->RatioCount == 1 );
+
 	DynMemoryCoreRatioTrack mctrack = PQUEMGR->RatioTrackers[0];
 
 	bool hasWorkload = mctrack->TotalUsed.MemoryMB +
@@ -2068,6 +2068,7 @@ int generateAllocRequestToBroker(void)
 	List 	 *ressegl	 = NULL;
 	ListCell *cell		 = NULL;
 	getAllPAIRRefIntoList(&(PRESPOOL->Segments), &ressegl);
+
 	foreach(cell, ressegl)
 	{
 		PAIR pair = (PAIR)lfirst(cell);
@@ -2168,6 +2169,37 @@ int generateAllocRequestToBroker(void)
 
 	elog(RMLOG, "Resource manager now needs %d GRM containers.", reqcore);
 
+	/*
+	 * Check if should raise water level to deal with resource fragment or
+	 * resource uneven problems. We trigger this logic only when no resource
+	 * request caused by lack of resource, and no pending resource are waited
+	 * for.
+	 */
+	if ( reqcore <= 0 &&
+		 mctrack->TotalPending.Core <= 0 &&
+		 (PQUEMGR->hasResourceProblem[RESPROBLEM_FRAGMENT] ||
+		  PQUEMGR->hasResourceProblem[RESPROBLEM_UNEVEN]   ||
+		  PQUEMGR->hasResourceProblem[RESPROBLEM_TOOFEWSEG]) )
+	{
+		/* Check if it is possible to raise water level. */
+		if ( mctrack->TotalAllocated.Core + 1 <=
+			 PRESPOOL->GRMTotal.Core * PQUEMGR->GRMQueueMaxCapacity )
+		{
+			/*
+			 * We only add one more GRM container to acquire, this will trigger
+			 * the following logic to raise the water level.
+			 */
+			reqcore = 1;
+			reqmem = reqcore * mctrack->MemCoreRatio;
+
+			PQUEMGR->hasResourceProblem[RESPROBLEM_FRAGMENT]  = false;
+			PQUEMGR->hasResourceProblem[RESPROBLEM_UNEVEN]    = false;
+			PQUEMGR->hasResourceProblem[RESPROBLEM_TOOFEWSEG] = false;
+
+			elog(LOG, "Resource manager raises segment resource water level.");
+		}
+	}
+
 	/* Call resource broker to request resource. */
 	if ( reqmem > 0 && reqcore > 0 )
 	{

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/dc12f127/src/backend/resourcemanager/resqueuemanager.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resqueuemanager.c b/src/backend/resourcemanager/resqueuemanager.c
index a56f9b5..c5ac176 100644
--- a/src/backend/resourcemanager/resqueuemanager.c
+++ b/src/backend/resourcemanager/resqueuemanager.c
@@ -166,7 +166,7 @@ void markMemoryCoreRatioWaterMark(DQueue 		marks,
 void buildTimeoutResponseForQueuedRequest(ConnectionTrack conntrack,
 										  uint32_t 		  reason);
 
-bool isResourceAcceptable(ConnectionTrack conn, int segnumact);
+enum RESOURCEPROBLEM isResourceAcceptable(ConnectionTrack conn, int segnumact);
 
 void adjustResourceExpectsByQueueNVSegLimits(ConnectionTrack conntrack);
 /*----------------------------------------------------------------------------*/
@@ -250,6 +250,11 @@ void initializeResourceQueueManager(void)
     PQUEMGR->GRMQueueCurCapacity			= 0.0;
     PQUEMGR->GRMQueueResourceTight			= false;
     PQUEMGR->toRunQueryDispatch 			= false;
+
+    for ( int i = 0 ; i < RESPROBLEM_COUNT ; ++i )
+    {
+    	PQUEMGR->hasResourceProblem[i] = false;
+    }
 }
 
 /*
@@ -4077,7 +4082,8 @@ int dispatchResourceToQueries_EVEN(DynResourceQueueTrack track)
 										 &segnumact,
 										 &(conn->SegIOBytes));
 
-		if ( isResourceAcceptable(conn, segnumact) )
+		enum RESOURCEPROBLEM accepted = isResourceAcceptable(conn, segnumact);
+		if ( accepted == RESPROBLEM_NO )
 		{
 			elog(DEBUG3, "Resource manager dispatched %d segment(s) to connection %d",
 						 segnumact,
@@ -4108,6 +4114,7 @@ int dispatchResourceToQueries_EVEN(DynResourceQueueTrack track)
 		}
 		else
 		{
+			PQUEMGR->hasResourceProblem[accepted] = true;
 			/*
 			 * In case we have 0 segments allocated. This may occur because we
 			 * have too many resource small pieces. In this case, we treat the
@@ -4131,16 +4138,16 @@ int dispatchResourceToQueries_EVEN(DynResourceQueueTrack track)
 			}
 
 			/* Mark the request has resource fragment problem. */
-			if ( !conn->troubledByFragment )
+			if ( !conn->troubledByFragment && accepted == RESPROBLEM_FRAGMENT )
 			{
 				conn->troubledByFragmentTimestamp = gettime_microsec();
 				conn->troubledByFragment 		  = true;
-			}
 
-			elog(LOG, "Resource fragment problem is probably encountered. "
-					  "Session "INT64_FORMAT" expects minimum %d virtual segments.",
-					  conn->SessionID,
-					  conn->SegNumMin);
+				elog(LOG, "Resource fragment problem is probably encountered. "
+						  "Session "INT64_FORMAT" expects minimum %d virtual segments.",
+						  conn->SessionID,
+						  conn->SegNumMin);
+			}
 
 			/* Decide whether continue to process next query request. */
 			if ( rm_force_fifo_queue )
@@ -4165,12 +4172,17 @@ int dispatchResourceToQueries_EVEN(DynResourceQueueTrack track)
 	return FUNC_RETURN_OK;
 }
 
-bool isResourceAcceptable(ConnectionTrack conn, int segnumact)
+enum RESOURCEPROBLEM isResourceAcceptable(ConnectionTrack conn, int segnumact)
 {
-	/* Enough number of vsegments. */
+	/*--------------------------------------------------------------------------
+	 * Enough number of vsegments. If resource queue has enough quota, but
+	 * resource pool does not provide enough virtual segments allocated, we
+	 * consider this a resource fragment problem.
+	 *--------------------------------------------------------------------------
+	 */
 	if ( segnumact < conn->SegNumMin )
 	{
-		return false;
+		return RESPROBLEM_FRAGMENT;
 	}
 
 	/*
@@ -4191,7 +4203,7 @@ bool isResourceAcceptable(ConnectionTrack conn, int segnumact)
 						  list_length(conn->Resource),
 						  PRESPOOL->SlavesHostCount,
 						  rm_tolerate_nseg_limit);
-			return false;
+			return RESPROBLEM_TOOFEWSEG;
 		}
 	}
 
@@ -4219,10 +4231,10 @@ bool isResourceAcceptable(ConnectionTrack conn, int segnumact)
 						  "minimum virtual segment size is %d.",
 						  maxval,
 						  minval);
-			return false;
+			return RESPROBLEM_UNEVEN;
 		}
 	}
-	return true;
+	return RESPROBLEM_NO;
 }
 
 int dispatchResourceToQueries_FIFO(DynResourceQueueTrack track)
@@ -4396,7 +4408,9 @@ void timeoutDeadResourceAllocation(void)
 {
 	uint64_t curmsec = gettime_microsec();
 
-	if ( curmsec - PQUEMGR->LastCheckingDeadAllocationTime < 1000000L * 5 ) {
+	if ( curmsec - PQUEMGR->LastCheckingDeadAllocationTime <
+		 1000000LL * rm_request_timeoutcheck_interval )
+	{
 		return;
 	}
 
@@ -4484,7 +4498,9 @@ void timeoutQueuedRequest(void)
 {
 	uint64_t curmsec = gettime_microsec();
 
-	if ( curmsec - PQUEMGR->LastCheckingQueuedTimeoutTime < 1000000L * 5 ) {
+	if ( curmsec - PQUEMGR->LastCheckingQueuedTimeoutTime <
+		 1000000LL * rm_request_timeoutcheck_interval )
+	{
 		return;
 	}
 
@@ -4499,7 +4515,7 @@ void timeoutQueuedRequest(void)
 		PCONTRACK->ConnHavingRequests = list_delete_first(PCONTRACK->ConnHavingRequests);
 
 		/*
-		 * Case 1. RM has no workable cluster built yet, the request is not
+		 * Case 1. RM has no available cluster built yet, the request is not
 		 * 		   added into resource queue manager queues.
 		 */
 		elog(DEBUG3, "Deferred connection track is found. "
@@ -4538,7 +4554,6 @@ void timeoutQueuedRequest(void)
 	ListCell *cell	  = NULL;
 
 	getAllPAIRRefIntoList(&(PCONTRACK->Connections), &allcons);
-
 	foreach(cell, allcons)
 	{
 		ConnectionTrack curcon = (ConnectionTrack)(((PAIR)lfirst(cell))->Value);
@@ -4549,19 +4564,19 @@ void timeoutQueuedRequest(void)
 			 * Check if corresponding mem core ratio tracker has long enough
 			 * time to waiting for GRM containers.
 			 */
-			DynResourceQueueTrack queuetrack = (DynResourceQueueTrack)(curcon->QueueTrack);
+			DynResourceQueueTrack queuetrack = (DynResourceQueueTrack)
+											   (curcon->QueueTrack);
 			int index = getResourceQueueRatioIndex(queuetrack->MemCoreRatio);
-			/* Case 1. No available cluster information yet. We check only top
-			 * 		   query waiting time and resource request time.
-			 * Case 2. We have available cluster information, we check the
-			 * 		   resource increase pending time and top query waiting time.
-			 */
 			Assert(PQUEMGR->RootTrack != NULL);
 
-			/* Check if this is a head request in the queue. */
+			/*
+			 * Set the head waiting timestamp if this request is a head request
+			 * in the target queue.
+			 */
 			if ( queuetrack->QueryResRequests.NodeCount > 0 )
 			{
-				ConnectionTrack topwaiter = getDQueueHeadNodeData(&(queuetrack->QueryResRequests));
+				ConnectionTrack topwaiter =
+						getDQueueHeadNodeData(&(queuetrack->QueryResRequests));
 				if ( topwaiter == curcon && topwaiter->HeadQueueTime == 0 )
 				{
 					topwaiter->HeadQueueTime = gettime_microsec();
@@ -4578,6 +4593,13 @@ void timeoutQueuedRequest(void)
 
 			bool tocancel = false;
 
+			/*
+			 * Case 1. No available cluster for executing.
+			 *
+			 * Case 2. No enough resource to run, resource manager is still
+			 * 		   acquiring resource from global resource manager, and the
+			 * 		   request is at the head of the queue.
+			 */
 			if ( ( (PQUEMGR->RootTrack->ClusterSegNumberMax == 0) &&
 				   (curmsec - curcon->ResRequestTime >
 						1000000L * rm_resource_allocation_timeout ) ) ||

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/dc12f127/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 1761a96..df56ae3 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -6538,6 +6538,16 @@ static struct config_int ConfigureNamesInt[] =
 	},
 
 	{
+		{"hawq_rm_request_timeoutcheck_interval", PGC_POSTMASTER, RESOURCES_MGM,
+			gettext_noop("interval for checking whether some resource contexts "
+						 "should be timed out."),
+			NULL
+		},
+		&rm_request_timeoutcheck_interval,
+		1, 1, 65535, NULL, NULL
+	},
+
+	{
 		{"hawq_rm_tolerate_nseg_limit", PGC_POSTMASTER, RESOURCES_MGM,
 			gettext_noop("resource manager re-allocates resource if the number of exclusive "
 						 "segments is greater than this limit value when there is at least "

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/dc12f127/src/include/cdb/cdbvars.h
----------------------------------------------------------------------
diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h
index efe6815..00206dd 100644
--- a/src/include/cdb/cdbvars.h
+++ b/src/include/cdb/cdbvars.h
@@ -1204,6 +1204,7 @@ extern bool    rm_session_lease_heartbeat_enable;
 
 extern int 	   rm_resource_allocation_timeout;
 extern int	   rm_resource_timeout;
+extern int	   rm_request_timeoutcheck_interval;
 extern int	   rm_session_lease_heartbeat_interval;
 extern int	   rm_tolerate_nseg_limit;
 extern int	   rm_rejectrequest_nseg_limit;


Mime
View raw message