Return-Path: X-Original-To: apmail-hawq-dev-archive@minotaur.apache.org Delivered-To: apmail-hawq-dev-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 03E1C175E0 for ; Thu, 29 Oct 2015 09:09:34 +0000 (UTC) Received: (qmail 15216 invoked by uid 500); 29 Oct 2015 09:09:33 -0000 Delivered-To: apmail-hawq-dev-archive@hawq.apache.org Received: (qmail 15165 invoked by uid 500); 29 Oct 2015 09:09:33 -0000 Mailing-List: contact dev-help@hawq.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hawq.incubator.apache.org Delivered-To: mailing list dev@hawq.incubator.apache.org Received: (qmail 15154 invoked by uid 99); 29 Oct 2015 09:09:33 -0000 Received: from Unknown (HELO spamd3-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 29 Oct 2015 09:09:33 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd3-us-west.apache.org (ASF Mail Server at spamd3-us-west.apache.org) with ESMTP id 00591180A43 for ; Thu, 29 Oct 2015 09:09:33 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd3-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: 0.971 X-Spam-Level: X-Spam-Status: No, score=0.971 tagged_above=-999 required=6.31 tests=[KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, T_RP_MATCHES_RCVD=-0.01, URIBL_BLOCKED=0.001] autolearn=disabled Received: from mx1-us-east.apache.org ([10.40.0.8]) by localhost (spamd3-us-west.apache.org [10.40.0.10]) (amavisd-new, port 10024) with ESMTP id i1LkLzLXIL5Y for ; Thu, 29 Oct 2015 09:09:26 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-us-east.apache.org (ASF Mail Server at mx1-us-east.apache.org) with SMTP id 9823D43AC8 for ; Thu, 29 Oct 2015 09:09:25 +0000 (UTC) Received: (qmail 14735 invoked by uid 99); 29 Oct 2015 09:09:25 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 29 Oct 2015 09:09:25 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 1EAD2E1091; Thu, 29 Oct 2015 09:09:25 +0000 (UTC) From: jiny2 To: dev@hawq.incubator.apache.org Reply-To: dev@hawq.incubator.apache.org References: In-Reply-To: Subject: [GitHub] incubator-hawq pull request: add check after submit Hawq AM to Yar... Content-Type: text/plain Message-Id: <20151029090925.1EAD2E1091@git1-us-west.apache.org> Date: Thu, 29 Oct 2015 09:09:25 +0000 (UTC) Github user jiny2 commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/58#discussion_r43362669 --- Diff: src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN_proc.c --- @@ -1276,20 +1277,43 @@ int RB2YARN_registerYARNApplication(void) elog(WARNING, "YARN mode resource broker failed to create application " "in YARN resource manager. %s", getErrorMessage()); + return yarnres; } - else { - elog(LOG, "YARN mode resource broker created job in YARN resource " - "manager %s as new application %s assigned to queue %s.", - YARNJobID, - YARNAppName.Str, - YARNQueueName.Str); - ResBrokerStartTime = gettime_microsec(); + elog(LOG, "YARN mode resource broker created job in YARN resource " + "manager %s as new application %s assigned to queue %s.", + YARNJobID, + YARNAppName.Str, + YARNQueueName.Str); - elog(LOG, "YARN mode resource broker registered new " - "YARN application. Start time stamp "UINT64_FORMAT, - ResBrokerStartTime); + /* check if hawq is registered successfully in Hadoop Yarn. + * if not, kill application from Hadoop Yarn. + */ + LibYarnApplicationReport_t *applicationReport = NULL; + result = getApplicationReport(LIBYARNClient, YARNJobID, &applicationReport); + if (result != FUNCTION_SUCCEEDED || applicationReport == NULL) { + elog(WARNING, "YARN mode resource broker failed to get application report, " + "so kill it from Hadoop Yarn."); + result = forceKillJob(LIBYARNClient, YARNJobID); + if (result != FUNCTION_SUCCEEDED) + elog(WARNING, "YARN mode resource broker kill job failed."); + return FUNCTION_FAILED; } + if (applicationReport->progress < 0.5) { + elog(WARNING, "YARN mode resource broker failed to register itself in Hadoop Yarn." + "Got progress:%f, and try to kill application from Hadoop Yarn", applicationReport->progress); + result = forceKillJob(LIBYARNClient, YARNJobID); + if (result != FUNCTION_SUCCEEDED) + elog(WARNING, "YARN mode resource broker kill job failed."); + return FUNCTION_FAILED; + } + + ResBrokerStartTime = gettime_microsec(); + + elog(LOG, "YARN mode resource broker registered new " + "YARN application. Progress:%f, Start time stamp "UINT64_FORMAT, + applicationReport->progress, ResBrokerStartTime); + return yarnres; } --- End diff -- In this file, it is better to make '{' occupy one line by itself. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastructure@apache.org or file a JIRA ticket with INFRA. ---