hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From h...@apache.org
Subject [2/2] incubator-hawq git commit: Revert "HAWQ-1342. Fixed QE process hang in shared input scan on segment node"
Date Fri, 10 Mar 2017 07:59:48 GMT
Revert "HAWQ-1342. Fixed QE process hang in shared input scan on segment node"

   The fix introduce hang regression in shared input scan query as described in HAWQ-1371


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/61780e99
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/61780e99
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/61780e99

Branch: refs/heads/master
Commit: 61780e999c508973e3dccd460e80f47853928277
Parents: 9d0ea46
Author: amyrazz44 <abai@pivotal.io>
Authored: Tue Mar 7 16:02:39 2017 +0800
Committer: Ruilong Huo <rhuo@pivotal.io>
Committed: Fri Mar 10 15:59:16 2017 +0800

----------------------------------------------------------------------
 src/backend/executor/nodeShareInputScan.c | 49 +++++++++++++++-----------
 1 file changed, 28 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/61780e99/src/backend/executor/nodeShareInputScan.c
----------------------------------------------------------------------
diff --git a/src/backend/executor/nodeShareInputScan.c b/src/backend/executor/nodeShareInputScan.c
index 74dbcb5..0f08848 100644
--- a/src/backend/executor/nodeShareInputScan.c
+++ b/src/backend/executor/nodeShareInputScan.c
@@ -40,6 +40,7 @@
 
 #include "postgres.h"
 
+#include "access/xact.h"
 #include "cdb/cdbvars.h"
 #include "executor/executor.h"
 #include "executor/nodeShareInputScan.h"
@@ -640,6 +641,10 @@ read_retry:
 		goto read_retry;
 	else
 	{
+		if(fd >= 0)
+		{
+			gp_retry_close(fd);
+		}
 		elog(ERROR, "could not read from fifo: %m");
 	}
 	Assert(!"Never be here");
@@ -659,6 +664,10 @@ write_retry:
 		goto write_retry;
 	else
 	{
+		if(fd >= 0)
+		{
+			gp_retry_close(fd);
+		}
 		elog(ERROR, "could not write to fifo: %m");
 	}
 
@@ -785,14 +794,7 @@ shareinput_reader_waitready(int share_id, PlanGenerator planGen)
 		{
 			int save_errno = errno;
 			elog(LOG, "SISC READER (shareid=%d, slice=%d): Wait ready try again, errno %d ... ",
-								share_id, currentSliceId, save_errno);
-			if(save_errno == EBADF)
-			{
-				/* The file description is invalid, maybe this FD has been already closed by writer in
some cases
-				 * we need to break here to avoid endless loop and continue to run CHECK_FOR_INTERRUPTS.
-				 */
-				break;
-			}
+					share_id, currentSliceId, save_errno);
 		}
 	}
 	return (void *) pctxt;
@@ -923,12 +925,9 @@ writer_wait_for_acks(ShareInput_Lk_Context *pctxt, int share_id, int
xslice)
 			int save_errno = errno;
 			elog(LOG, "SISC WRITER (shareid=%d, slice=%d): notify still wait for an answer, errno
%d",
 					share_id, currentSliceId, save_errno);
-			if(save_errno == EBADF)
-			{
-				/* The file description is invalid, maybe this FD has been already closed by writer in
some cases
-				 * we need to break here to avoid endless loop and continue to run CHECK_FOR_INTERRUPTS.
-				 */
-				break;
+			/*if error(except EINTR) happens in select, we just return to avoid endless loop*/
+			if(errno != EINTR){
+				return;
 			}
 		}
 	}
@@ -980,6 +979,21 @@ shareinput_writer_waitdone(void *ctxt, int share_id, int nsharer_xslice)
 	while(ack_needed > 0)
 	{
 		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * Writer won't wait for data reading done notification from readers if transaction is
+		 * aborting. Readers may fail to send data reading done notification to writer in two
+		 * cases:
+		 *
+		 *    1. The transaction is aborted due to interrupts or exceptions, i.e., user cancels
+		 *       query, division by zero on some segment
+		 *
+		 *    2. Logic errors in reader which incur its unexpected exit, i.e., segmentation fault
+		 */
+		if (IsAbortInProgress())
+		{
+			break;
+		}
 	
 		MPP_FD_ZERO(&rset);
 		MPP_FD_SET(pctxt->donefd, &rset);
@@ -1010,13 +1024,6 @@ shareinput_writer_waitdone(void *ctxt, int share_id, int nsharer_xslice)
 			int save_errno = errno;
 			elog(LOG, "SISC WRITER (shareid=%d, slice=%d): wait done time out once, errno %d",
 					share_id, currentSliceId, save_errno);
-			if(save_errno == EBADF)
-			{
-				/* The file description is invalid, maybe this FD has been already closed by writer in
some cases
-				 * we need to break here to avoid endless loop and continue to run CHECK_FOR_INTERRUPTS.
-				 */
-				break;
-			}
 		}
 	}
 


Mime
View raw message