Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 1384B2004A0 for ; Wed, 16 Aug 2017 09:30:11 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 1204016830A; Wed, 16 Aug 2017 07:30:11 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 5905A16830D for ; Wed, 16 Aug 2017 09:30:10 +0200 (CEST) Received: (qmail 56103 invoked by uid 500); 16 Aug 2017 07:30:09 -0000 Mailing-List: contact commits-help@asterixdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@asterixdb.apache.org Delivered-To: mailing list commits@asterixdb.apache.org Received: (qmail 56094 invoked by uid 99); 16 Aug 2017 07:30:09 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 16 Aug 2017 07:30:09 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 70C52DFF9F; Wed, 16 Aug 2017 07:30:09 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mhubail@apache.org To: commits@asterixdb.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: asterixdb git commit: [ASTERIXDB-2042][CLUS] Shutdown NC on Startup Completion Failure Date: Wed, 16 Aug 2017 07:30:09 +0000 (UTC) archived-at: Wed, 16 Aug 2017 07:30:11 -0000 Repository: asterixdb Updated Branches: refs/heads/master 98de3eb14 -> 05a5a376b [ASTERIXDB-2042][CLUS] Shutdown NC on Startup Completion Failure - user model changes: no - storage format changes: no - interface changes: no Details: - Report runtime exceptions of NC startup completion to CC. - Shutdown NC after reporting startup completion failure. Change-Id: I6c2ff0130e5e3e35ccf42a66d6855e568dce1fbe Reviewed-on: https://asterix-gerrit.ics.uci.edu/1943 Sonar-Qube: Jenkins Tested-by: Jenkins Integration-Tests: Michael Blow Integration-Tests: Jenkins Reviewed-by: abdullah alamoudi Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/05a5a376 Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/05a5a376 Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/05a5a376 Branch: refs/heads/master Commit: 05a5a376be7d97066c61f97ffb27e05db70c40f8 Parents: 98de3eb Author: Murtadha Hubail Authored: Tue Aug 15 20:44:17 2017 +0300 Committer: Murtadha Hubail Committed: Wed Aug 16 00:29:41 2017 -0700 ---------------------------------------------------------------------- .../message/NCLifecycleTaskReportMessage.java | 6 ++-- .../message/StartupTaskResponseMessage.java | 35 ++++++++++++-------- .../hyracks/control/nc/NCShutdownHook.java | 2 ++ 3 files changed, 26 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/asterixdb/blob/05a5a376/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/NCLifecycleTaskReportMessage.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/NCLifecycleTaskReportMessage.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/NCLifecycleTaskReportMessage.java index a01d70a..2b32e1f 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/NCLifecycleTaskReportMessage.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/NCLifecycleTaskReportMessage.java @@ -28,7 +28,7 @@ public class NCLifecycleTaskReportMessage implements INCLifecycleMessage, ICcAdd private static final long serialVersionUID = 1L; private final String nodeId; private final boolean success; - private Exception exception; + private Throwable exception; public NCLifecycleTaskReportMessage(String nodeId, boolean success) { this.nodeId = nodeId; @@ -48,11 +48,11 @@ public class NCLifecycleTaskReportMessage implements INCLifecycleMessage, ICcAdd return success; } - public Exception getException() { + public Throwable getException() { return exception; } - public void setException(Exception exception) { + public void setException(Throwable exception) { this.exception = exception; } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/05a5a376/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/StartupTaskResponseMessage.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/StartupTaskResponseMessage.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/StartupTaskResponseMessage.java index aaf3eb8..1611507 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/StartupTaskResponseMessage.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/replication/message/StartupTaskResponseMessage.java @@ -29,6 +29,7 @@ import org.apache.asterix.common.messaging.api.INcAddressedMessage; import org.apache.asterix.common.replication.INCLifecycleMessage; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.service.IControllerService; +import org.apache.hyracks.control.nc.NCShutdownHook; public class StartupTaskResponseMessage implements INCLifecycleMessage, INcAddressedMessage { @@ -47,22 +48,28 @@ public class StartupTaskResponseMessage implements INCLifecycleMessage, INcAddre INCMessageBroker broker = (INCMessageBroker) appCtx.getServiceContext().getMessageBroker(); IControllerService cs = appCtx.getServiceContext().getControllerService(); boolean success = true; - HyracksDataException exception = null; try { - for (INCLifecycleTask task : tasks) { - task.perform(cs); + Throwable exception = null; + try { + for (INCLifecycleTask task : tasks) { + task.perform(cs); + } + } catch (Throwable e) { //NOSONAR all startup failures should be reported to CC + success = false; + exception = e; + } + NCLifecycleTaskReportMessage result = new NCLifecycleTaskReportMessage(nodeId, success); + result.setException(exception); + try { + broker.sendMessageToCC(result); + } catch (Exception e) { + LOGGER.log(Level.SEVERE, "Failed sending message to cc", e); + } + } finally { + if (!success) { + // stop NC so that it can be started again + Runtime.getRuntime().exit(NCShutdownHook.FAILED_TO_STARTUP_EXIT_CODE); //NOSONAR startup failed } - } catch (HyracksDataException e) { - success = false; - exception = e; - } - NCLifecycleTaskReportMessage result = new NCLifecycleTaskReportMessage(nodeId, success); - result.setException(exception); - try { - broker.sendMessageToCC(result); - } catch (Exception e) { - LOGGER.log(Level.SEVERE, "Failed sending message to cc", e); - throw HyracksDataException.create(e); } } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/05a5a376/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NCShutdownHook.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NCShutdownHook.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NCShutdownHook.java index 4d0c159..162d912 100644 --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NCShutdownHook.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-nc/src/main/java/org/apache/hyracks/control/nc/NCShutdownHook.java @@ -27,6 +27,8 @@ import java.util.logging.Logger; * operation is hanging for any reason */ public class NCShutdownHook extends Thread { + + public static final int FAILED_TO_STARTUP_EXIT_CODE = 2; private static final Logger LOGGER = Logger.getLogger(NCShutdownHook.class.getName()); private static final long SHUTDOWN_WAIT_TIME = 10 * 60 * 1000L; private final Thread watchDog;