Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 14BCF200C50 for ; Fri, 3 Mar 2017 10:46:03 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 1384F160B57; Fri, 3 Mar 2017 09:46:03 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 05E79160B6D for ; Fri, 3 Mar 2017 10:46:01 +0100 (CET) Received: (qmail 26656 invoked by uid 500); 3 Mar 2017 09:46:01 -0000 Mailing-List: contact commits-help@ambari.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: ambari-dev@ambari.apache.org Delivered-To: mailing list commits@ambari.apache.org Received: (qmail 26550 invoked by uid 99); 3 Mar 2017 09:46:01 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 03 Mar 2017 09:46:01 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 11CCBDFE34; Fri, 3 Mar 2017 09:46:01 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mpapirkovskyy@apache.org To: commits@ambari.apache.org Date: Fri, 03 Mar 2017 09:46:02 -0000 Message-Id: <1d462beeb3dd4a8991bae6b6a5dbd04f@git.apache.org> In-Reply-To: <57752e4224824d3cb557ff064ec87d16@git.apache.org> References: <57752e4224824d3cb557ff064ec87d16@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [2/2] ambari git commit: AMBARI-20269. Failed task during EU is not reported upfront causing Upgrade to show 'Aborted' after Finalize step. (mpapirkovskyy) archived-at: Fri, 03 Mar 2017 09:46:03 -0000 AMBARI-20269. Failed task during EU is not reported upfront causing Upgrade to show 'Aborted' after Finalize step. (mpapirkovskyy) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/d567127a Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/d567127a Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/d567127a Branch: refs/heads/branch-2.5 Commit: d567127ae0fc1580f48d12e84fbab7a48a2f4a94 Parents: 97c2989 Author: Myroslav Papirkovskyi Authored: Thu Mar 2 19:40:55 2017 +0200 Committer: Myroslav Papirkovskyi Committed: Fri Mar 3 11:45:43 2017 +0200 ---------------------------------------------------------------------- .../server/actionmanager/ActionDBAccessor.java | 4 +- .../actionmanager/ActionDBAccessorImpl.java | 11 ++-- .../server/actionmanager/ActionScheduler.java | 30 ++--------- .../actionmanager/TestActionScheduler.java | 53 +++++++++----------- 4 files changed, 38 insertions(+), 60 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/d567127a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java index 217fe0a..9325d03 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java @@ -72,8 +72,8 @@ public interface ActionDBAccessor { /** * Mark the task as to have timed out */ - void timeoutHostRole(String host, long requestId, long stageId, - String role, boolean skipSupported); + void timeoutHostRole(String host, long requestId, long stageId, String role, + boolean skipSupported, boolean hostUnknownState); /** * Returns all the pending stages, including queued and not-queued. A stage is http://git-wip-us.apache.org/repos/asf/ambari/blob/d567127a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java index 7881a4b..04feda0 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java @@ -238,20 +238,21 @@ public class ActionDBAccessorImpl implements ActionDBAccessor { @Override public void timeoutHostRole(String host, long requestId, long stageId, String role) { - timeoutHostRole(host, requestId, stageId, role, false); + timeoutHostRole(host, requestId, stageId, role, false, false); } @Override - public void timeoutHostRole(String host, long requestId, long stageId, - String role, boolean skipSupported) { + public void timeoutHostRole(String host, long requestId, long stageId, String role, + boolean skipSupported, boolean hostUnknownState) { long now = System.currentTimeMillis(); List commands = - hostRoleCommandDAO.findByHostRole(host, requestId, stageId, role); + hostRoleCommandDAO.findByHostRole(host, requestId, stageId, role); for (HostRoleCommandEntity command : commands) { if (skipSupported) { command.setStatus(HostRoleStatus.SKIPPED_FAILED); } else { - command.setStatus(command.isRetryAllowed() ? HostRoleStatus.HOLDING_TIMEDOUT : HostRoleStatus.TIMEDOUT); + command.setStatus(command.isRetryAllowed() ? HostRoleStatus.HOLDING_TIMEDOUT : + hostUnknownState ? HostRoleStatus.ABORTED : HostRoleStatus.TIMEDOUT); } command.setEndTime(now); http://git-wip-us.apache.org/repos/asf/ambari/blob/d567127a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java index 680c0a6..7abb42b 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java @@ -786,6 +786,7 @@ class ActionScheduler implements Runnable { } // Check that service host component is not deleted + boolean isHostStateUnknown = false; if (hostDeleted) { String message = String.format( @@ -803,9 +804,10 @@ class ActionScheduler implements Runnable { processActionDeath(cluster.getClusterName(), c.getHostname(), roleStr); } status = HostRoleStatus.ABORTED; - } else if (timeOutActionNeeded(status, s, hostObj, roleStr, now, commandTimeout)) { + } else if (timeOutActionNeeded(status, s, hostObj, roleStr, now, commandTimeout) + || (isHostStateUnknown = isHostStateUnknown(s, hostObj, roleStr))) { // Process command timeouts - if (s.getAttemptCount(host, roleStr) >= maxAttempts) { + if (s.getAttemptCount(host, roleStr) >= maxAttempts || isHostStateUnknown) { LOG.warn("Host: {}, role: {}, actionId: {} expired and will be failed", host, roleStr, s.getActionId()); @@ -816,7 +818,7 @@ class ActionScheduler implements Runnable { isSkipSupported = hostRoleCommand.isFailureAutoSkipped(); } - db.timeoutHostRole(host, s.getRequestId(), s.getStageId(), c.getRole(), isSkipSupported); + db.timeoutHostRole(host, s.getRequestId(), s.getStageId(), c.getRole(), isSkipSupported, isHostStateUnknown); //Reinitialize status status = s.getHostRoleStatus(host, roleStr); @@ -845,28 +847,6 @@ class ActionScheduler implements Runnable { commandsToSchedule.add(c); LOG.trace("===> commandsToSchedule(reschedule)=" + commandsToSchedule.size()); } - } else if (isHostStateUnknown(s, hostObj, roleStr)) { - String message = "Action was aborted due agent is not heartbeating or was restarted."; - LOG.warn("Host: {}, role: {}, actionId: {} . {}", host, roleStr, - s.getActionId(), message); - - db.abortHostRole(host, s.getRequestId(), s.getStageId(), c.getRole(), message); - - if (null != cluster) { - if (!RoleCommand.CUSTOM_COMMAND.equals(c.getRoleCommand()) - && !RoleCommand.SERVICE_CHECK.equals(c.getRoleCommand()) - && !RoleCommand.ACTIONEXECUTE.equals(c.getRoleCommand())) { - //commands above don't affect host component state (e.g. no in_progress state in process), transition will fail - transitionToFailedState(cluster.getClusterName(), c.getServiceName(), roleStr, host, now, false); - } - if (c.getRoleCommand().equals(RoleCommand.ACTIONEXECUTE)) { - processActionDeath(cluster.getClusterName(), c.getHostname(), roleStr); - } - } - - // Dequeue command - LOG.info("Removing command from queue, host={}, commandId={} ", host, c.getCommandId()); - actionQueue.dequeue(host, c.getCommandId()); } else if (status.equals(HostRoleStatus.PENDING)) { // in case of DEPENDENCY_ORDERED stage command can be scheduled only if all of it's dependencies are // already finished http://git-wip-us.apache.org/repos/asf/ambari/blob/d567127a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java index 347945d..b8298ed 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java @@ -330,7 +330,7 @@ public class TestActionScheduler { command.setStatus(HostRoleStatus.TIMEDOUT); return null; } - }).when(db).timeoutHostRole(anyString(), anyLong(), anyLong(), anyString(), anyBoolean()); + }).when(db).timeoutHostRole(anyString(), anyLong(), anyLong(), anyString(), anyBoolean(), eq(false)); //Small action timeout to test rescheduling @@ -409,32 +409,18 @@ public class TestActionScheduler { doAnswer(new Answer() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { - Long requestId = (Long) invocation.getArguments()[1]; - for (Stage stage : stages) { - if (requestId.equals(stage.getRequestId())) { - for (HostRoleCommand command : stage.getOrderedHostRoleCommands()) { - if (command.getStatus() == HostRoleStatus.QUEUED || - command.getStatus() == HostRoleStatus.IN_PROGRESS || - command.getStatus() == HostRoleStatus.PENDING) { - command.setStatus(HostRoleStatus.ABORTED); - } - } - } - } - + String host = (String) invocation.getArguments()[0]; + String role = (String) invocation.getArguments()[3]; + HostRoleCommand command = s.getHostRoleCommand(host, role); + command.setStatus(HostRoleStatus.ABORTED); return null; } - }).when(db).abortHostRole(anyString(), anyLong(), anyLong(), anyString(), anyString()); + }).when(db).timeoutHostRole(anyString(), anyLong(), anyLong(), anyString(), anyBoolean(), eq(true)); //Small action timeout to test rescheduling AmbariEventPublisher aep = EasyMock.createNiceMock(AmbariEventPublisher.class); - ActionScheduler scheduler = EasyMock.createMockBuilder(ActionScheduler.class). - withConstructor((long) 100, (long) 50, db, aq, fsm, 3, - new HostsMap((String) null), unitOfWork, aep, conf, entityManagerProviderMock, - mock(HostRoleCommandDAO.class), mock(HostRoleCommandFactory.class)). - addMockedMethod("cancelHostRoleCommands"). - createMock(); - EasyMock.replay(scheduler); + ActionScheduler scheduler = new ActionScheduler(100, 0, db, aq, fsm, 3, + new HostsMap((String) null), unitOfWork, null, conf, entityManagerProviderMock, null, null); scheduler.setTaskTimeoutAdjustment(false); int cycleCount=0; @@ -445,7 +431,7 @@ public class TestActionScheduler { Assert.assertEquals(HostRoleStatus.ABORTED,stages.get(0).getHostRoleStatus(hostname, "NAMENODE")); - EasyMock.verify(scheduler, entityManagerProviderMock); + EasyMock.verify(entityManagerProviderMock); } @Test @@ -492,7 +478,7 @@ public class TestActionScheduler { when(serviceObj.getCluster()).thenReturn(oneClusterMock); final List stages = new ArrayList(); - Stage stage = stageFactory.createNew(1, "/tmp", "cluster1", 1L, "stageWith2Tasks", + final Stage stage = stageFactory.createNew(1, "/tmp", "cluster1", 1L, "stageWith2Tasks", CLUSTER_HOST_INFO, "{\"command_param\":\"param_value\"}", "{\"host_param\":\"param_value\"}"); addInstallTaskToStage(stage, hostname1, "cluster1", Role.DATANODE, RoleCommand.INSTALL, Service.Type.HDFS, 1); @@ -509,10 +495,21 @@ public class TestActionScheduler { when(db.getCommandsInProgressCount()).thenReturn(stages.size()); when(db.getStagesInProgress()).thenReturn(stages); + doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + String host = (String) invocation.getArguments()[0]; + String role = (String) invocation.getArguments()[3]; + HostRoleCommand command = stage.getHostRoleCommand(host, role); + command.setStatus(HostRoleStatus.ABORTED); + return null; + } + }).when(db).timeoutHostRole(anyString(), anyLong(), anyLong(), anyString(), anyBoolean(), eq(true)); + doAnswer(new Answer>() { @Override - public Collection answer(InvocationOnMock invocation) throws Throwable { - Long requestId = (Long) invocation.getArguments()[1]; + public Collection answer(InvocationOnMock invocation) throws Throwable { + Long requestId = (Long) invocation.getArguments()[0]; List abortedCommands = Lists.newArrayList(); for (Stage stage : stages) { @@ -534,7 +531,7 @@ public class TestActionScheduler { return abortedCommands; } - }).when(db).abortHostRole(anyString(), anyLong(), anyLong(), anyString(), anyString()); + }).when(db).abortOperation(anyLong()); ArgumentCaptor eventsCapture1 = ArgumentCaptor.forClass(ServiceComponentHostEvent.class); @@ -940,7 +937,7 @@ public class TestActionScheduler { boolean taskShouldBeSkipped = stageSupportsAutoSkip && autoSkipFailedTask; db.timeoutHostRole(EasyMock.anyString(), EasyMock.anyLong(), EasyMock.anyLong(), - EasyMock.anyString(), EasyMock.eq(taskShouldBeSkipped)); + EasyMock.anyString(), EasyMock.eq(taskShouldBeSkipped), EasyMock.anyBoolean()); EasyMock.expectLastCall();