Return-Path: X-Original-To: apmail-cloudstack-commits-archive@www.apache.org Delivered-To: apmail-cloudstack-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id AB437109C7 for ; Tue, 8 Apr 2014 12:27:58 +0000 (UTC) Received: (qmail 61023 invoked by uid 500); 8 Apr 2014 12:25:55 -0000 Delivered-To: apmail-cloudstack-commits-archive@cloudstack.apache.org Received: (qmail 60821 invoked by uid 500); 8 Apr 2014 12:25:51 -0000 Mailing-List: contact commits-help@cloudstack.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@cloudstack.apache.org Delivered-To: mailing list commits@cloudstack.apache.org Received: (qmail 56417 invoked by uid 99); 8 Apr 2014 12:25:19 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 08 Apr 2014 12:25:19 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id D3EE194DF22; Tue, 8 Apr 2014 12:25:16 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: talluri@apache.org To: commits@cloudstack.apache.org Date: Tue, 08 Apr 2014 12:25:36 -0000 Message-Id: <93548711f0414cc881fcdea4029d5d27@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [21/60] [abbrv] git commit: updated refs/heads/marvin to 0e223d6 BUG-ID: CS-19697: Deal with possible DB deadlock issue, return the job into queue for next turn Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/66486d43 Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/66486d43 Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/66486d43 Branch: refs/heads/marvin Commit: 66486d4322121c2a8903ab94db60fb894c5ad149 Parents: 66bece5 Author: Kelven Yang Authored: Mon Mar 24 17:50:27 2014 -0700 Committer: Kelven Yang Committed: Wed Apr 2 14:59:04 2014 -0700 ---------------------------------------------------------------------- .../framework/messagebus/MessageDetector.java | 4 ++- .../jobs/impl/AsyncJobManagerImpl.java | 38 ++++++++++++++++---- 2 files changed, 34 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cloudstack/blob/66486d43/framework/ipc/src/org/apache/cloudstack/framework/messagebus/MessageDetector.java ---------------------------------------------------------------------- diff --git a/framework/ipc/src/org/apache/cloudstack/framework/messagebus/MessageDetector.java b/framework/ipc/src/org/apache/cloudstack/framework/messagebus/MessageDetector.java index 3fb620c..fae9bf3 100644 --- a/framework/ipc/src/org/apache/cloudstack/framework/messagebus/MessageDetector.java +++ b/framework/ipc/src/org/apache/cloudstack/framework/messagebus/MessageDetector.java @@ -31,8 +31,10 @@ public class MessageDetector implements MessageSubscriber { } public boolean waitAny(long timeoutInMiliseconds) { - _signalled = false; synchronized (this) { + if (_signalled) + return true; + try { wait(timeoutInMiliseconds); } catch (InterruptedException e) { http://git-wip-us.apache.org/repos/asf/cloudstack/blob/66486d43/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java ---------------------------------------------------------------------- diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java index 4acfdbf..42148be 100644 --- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java +++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java @@ -445,9 +445,9 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, CallContext.registerPlaceHolderContext(); if (job.getRelated() != null && !job.getRelated().isEmpty()) - NDC.push("Job-" + job.getRelated() + "/" + "Job-" + job.getId()); + NDC.push("job-" + job.getRelated() + "/" + "job-" + job.getId()); else - NDC.push("Job-" + job.getId()); + NDC.push("job-" + job.getId()); try { super.run(); } finally { @@ -560,17 +560,41 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, job.setSyncSource(item); - job.setExecutingMsid(getMsid()); - _jobDao.update(job.getId(), job); + // + // TODO: a temporary solution to work-around DB deadlock situation + // + // to live with DB deadlocks, we will give a chance for job to be rescheduled + // in case of exceptions (most-likely DB deadlock exceptions) + try { + job.setExecutingMsid(getMsid()); + _jobDao.update(job.getId(), job); + } catch (Exception e) { + s_logger.warn("Unexpected exception while dispatching job-" + item.getContentId(), e); + + try { + _queueMgr.returnItem(item.getId()); + } catch (Throwable thr) { + s_logger.error("Unexpected exception while returning job-" + item.getContentId() + " to queue", thr); + } + } try { scheduleExecution(job); } catch (RejectedExecutionException e) { s_logger.warn("Execution for job-" + job.getId() + " is rejected, return it to the queue for next turn"); - _queueMgr.returnItem(item.getId()); - job.setExecutingMsid(null); - _jobDao.update(job.getId(), job); + try { + _queueMgr.returnItem(item.getId()); + } catch (Exception e2) { + s_logger.error("Unexpected exception while returning job-" + item.getContentId() + " to queue", e2); + } + + try { + job.setExecutingMsid(null); + _jobDao.update(job.getId(), job); + } catch (Exception e3) { + s_logger.warn("Unexpected exception while update job-" + item.getContentId() + " msid for bookkeeping"); + } } } else {