hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From y...@apache.org
Subject incubator-hawq git commit: HAWQ-81. Make resource manager to reject resource request when there are too many segments down
Date Thu, 29 Oct 2015 02:35:55 GMT
Repository: incubator-hawq
Updated Branches:
  refs/heads/master 122e75dff -> d3242b458


HAWQ-81. Make resource manager to reject resource request when there are too many segments
down


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/d3242b45
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/d3242b45
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/d3242b45

Branch: refs/heads/master
Commit: d3242b458b9d28902943dba0d9b9d9bd56c8b54a
Parents: 122e75d
Author: Yi Jin <yjin@pivotal.io>
Authored: Thu Oct 29 10:35:33 2015 +0800
Committer: Yi Jin <yjin@pivotal.io>
Committed: Thu Oct 29 10:35:33 2015 +0800

----------------------------------------------------------------------
 src/backend/cdb/cdbvars.c                             |  1 +
 .../resourcemanager/communication/rmcomm_QD2RM.c      |  5 ++---
 src/backend/resourcemanager/errorcode.c               |  1 +
 src/backend/resourcemanager/include/errorcode.h       |  1 +
 src/backend/resourcemanager/requesthandler.c          | 11 +++++++++++
 src/backend/utils/misc/guc.c                          | 14 +++++++++++++-
 src/include/cdb/cdbvars.h                             |  2 +-
 7 files changed, 30 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d3242b45/src/backend/cdb/cdbvars.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c
index 6b468e9..93b823f 100644
--- a/src/backend/cdb/cdbvars.c
+++ b/src/backend/cdb/cdbvars.c
@@ -339,6 +339,7 @@ int		rm_resource_heartbeat_interval; /* How many seconds to wait before
sending
 										   another heart-beat to resource manager. */
 
 int		rm_tolerate_nseg_limit;
+int		rm_rejectrequest_nseg_limit;
 int		rm_nvseg_variance_among_seg_limit;
 
 char   *rm_resourcepool_test_filename;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d3242b45/src/backend/resourcemanager/communication/rmcomm_QD2RM.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/communication/rmcomm_QD2RM.c b/src/backend/resourcemanager/communication/rmcomm_QD2RM.c
index 09d6da9..0cab819 100644
--- a/src/backend/resourcemanager/communication/rmcomm_QD2RM.c
+++ b/src/backend/resourcemanager/communication/rmcomm_QD2RM.c
@@ -649,8 +649,7 @@ int acquireResourceFromRM(int 		  		  index,
     if ( res != FUNC_RETURN_OK )
     {
     	snprintf(errorbuf, errorbufsize,
-    			 "failed to acquire resource from HAWQ resource manager because "
-    			 "of RPC error %s.",
+    			 "failed to acquire resource because of %s.",
 				 getErrorCodeExplain(res));
     	pgstat_report_waiting_resource(false);
     	return res;
@@ -662,7 +661,7 @@ int acquireResourceFromRM(int 		  		  index,
     if ( errres->Result != FUNC_RETURN_OK )
     {
     	snprintf(errorbuf, errorbufsize,
-    			 "failed to acquire resource because of remote error %s.",
+    			 "failed to acquire resource because of %s.",
     			 getErrorCodeExplain(errres->Result));
     	return errres->Result;
     }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d3242b45/src/backend/resourcemanager/errorcode.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/errorcode.c b/src/backend/resourcemanager/errorcode.c
index 49c1a1e..1943940 100644
--- a/src/backend/resourcemanager/errorcode.c
+++ b/src/backend/resourcemanager/errorcode.c
@@ -14,6 +14,7 @@ ErrorDetailData ErrorDetailsPreset[] = {
 		{RESQUEMGR_TOO_MANY_FIXED_SEGNUM,		"expecting too many virtual segments"},
 		{REQUESTHANDLER_WRONG_CONNSTAT,			"that resource context maybe recycled due to timeout"},
 		{CONNTRACK_NO_CONNID,					"that resource context does not exist"},
+		{RESOURCEPOOL_TOO_MANY_UAVAILABLE_HOST, "too many unavailable segments"},
 
 		{-1, ""}
 };

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d3242b45/src/backend/resourcemanager/include/errorcode.h
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/include/errorcode.h b/src/backend/resourcemanager/include/errorcode.h
index 8c86011..1e82073 100644
--- a/src/backend/resourcemanager/include/errorcode.h
+++ b/src/backend/resourcemanager/include/errorcode.h
@@ -113,6 +113,7 @@ enum DRM_ERROR_CODE {
 	RESOURCEPOOL_NO_GROUPID,
 	RESOURCEPOOL_DUPLICATE_HOST,
 	RESOURCEPOOL_UNRESOLVED_HOST,
+	RESOURCEPOOL_TOO_MANY_UAVAILABLE_HOST,
 
 	/*-----------------------------------------------------------------------*/
 	COMM2RM_CLIENT_START_TAG = 500,

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d3242b45/src/backend/resourcemanager/requesthandler.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/requesthandler.c b/src/backend/resourcemanager/requesthandler.c
index 55a3dcd..602dcc3 100644
--- a/src/backend/resourcemanager/requesthandler.c
+++ b/src/backend/resourcemanager/requesthandler.c
@@ -356,6 +356,17 @@ bool handleRMRequestAcquireResource(void **arg)
 		goto sendresponse;
 	}
 
+	/* Check if HAWQ has enough alive segments. */
+	int unavailcount = PRESPOOL->SlavesHostCount - PRESPOOL->AvailNodeCount;
+	if ( unavailcount >= rm_rejectrequest_nseg_limit )
+	{
+		elog(WARNING, "Resource manager finds %d segments not available yet, all "
+					  "resource allocation requests are rejected.",
+					  unavailcount);
+		res = RESOURCEPOOL_TOO_MANY_UAVAILABLE_HOST;
+		goto sendresponse;
+	}
+
 	/* Get scan size. */
 	request = (RPCRequestHeadAcquireResourceFromRM)
 			  ((*conntrack)->MessageBuff.Buffer);

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d3242b45/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 6ce5c2f..a533cff 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -6494,7 +6494,9 @@ static struct config_int ConfigureNamesInt[] =
 
 	{
 		{"hawq_rm_tolerate_nseg_limit", PGC_POSTMASTER, RESOURCES_MGM,
-			gettext_noop("the size of down segments that resource manager should tolerate at most
."),
+			gettext_noop("resource manager re-allocates resource if the number of exclusive "
+						 "segments is greater than this limit value when there is at least "
+						 "one segment containing two or more virtual segments."),
 			NULL
 		},
 		&rm_tolerate_nseg_limit,
@@ -6502,6 +6504,16 @@ static struct config_int ConfigureNamesInt[] =
 	},
 
 	{
+		{"hawq_rm_rejectrequest_nseg_limit", PGC_POSTMASTER, RESOURCES_MGM,
+			gettext_noop("resource manager rejects new resource request if the number of "
+						 "unavailable segments is greater than this limit value."),
+			NULL
+		},
+		&rm_rejectrequest_nseg_limit,
+		2, 0, 65535, NULL, NULL
+	},
+
+	{
 		{"hawq_rm_nvseg_variance_amon_seg_limit", PGC_POSTMASTER, RESOURCES_MGM,
 			gettext_noop("the variance of vseg number in each segment that resource manager should
tolerate at most."),
 			NULL

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d3242b45/src/include/cdb/cdbvars.h
----------------------------------------------------------------------
diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h
index 1d4f7c7..5e2ce36 100644
--- a/src/include/cdb/cdbvars.h
+++ b/src/include/cdb/cdbvars.h
@@ -1186,11 +1186,11 @@ extern int 	   rm_query_resource_noresource_timeout;
 extern int	   rm_resource_timeout;
 extern int	   rm_resource_heartbeat_interval;
 extern int	   rm_tolerate_nseg_limit;
+extern int	   rm_rejectrequest_nseg_limit;
 extern int	   rm_nvseg_variance_among_seg_limit;
 extern char   *rm_resourcepool_test_filename;
 extern bool	   rm_force_fifo_queue;
 
-
 extern bool	   rm_enforce_cpu_enable;
 extern bool    rm_enforce_blkio_enable;
 extern char   *rm_enforce_cgrp_mnt_pnt;


Mime
View raw message