Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id C3648200CC2 for ; Wed, 5 Jul 2017 17:18:40 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id C19C91639C7; Wed, 5 Jul 2017 15:18:40 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id B97721639C5 for ; Wed, 5 Jul 2017 17:18:39 +0200 (CEST) Received: (qmail 98705 invoked by uid 500); 5 Jul 2017 15:18:39 -0000 Mailing-List: contact commits-help@ariatosca.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ariatosca.incubator.apache.org Delivered-To: mailing list commits@ariatosca.incubator.apache.org Received: (qmail 98696 invoked by uid 99); 5 Jul 2017 15:18:38 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd3-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 05 Jul 2017 15:18:38 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd3-us-west.apache.org (ASF Mail Server at spamd3-us-west.apache.org) with ESMTP id 856ED1916AD for ; Wed, 5 Jul 2017 15:18:38 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd3-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -4.22 X-Spam-Level: X-Spam-Status: No, score=-4.22 tagged_above=-999 required=6.31 tests=[HK_RANDOM_FROM=0.001, KAM_ASCII_DIVIDERS=0.8, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-0.001, SPF_PASS=-0.001, URIBL_BLOCKED=0.001] autolearn=disabled Received: from mx1-lw-us.apache.org ([10.40.0.8]) by localhost (spamd3-us-west.apache.org [10.40.0.10]) (amavisd-new, port 10024) with ESMTP id owNqVamsiuUh for ; Wed, 5 Jul 2017 15:18:36 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-lw-us.apache.org (ASF Mail Server at mx1-lw-us.apache.org) with SMTP id 5B90F5FB7A for ; Wed, 5 Jul 2017 15:18:35 +0000 (UTC) Received: (qmail 98681 invoked by uid 99); 5 Jul 2017 15:18:34 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 05 Jul 2017 15:18:34 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 82D2ADFC25; Wed, 5 Jul 2017 15:18:34 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mxmrlv@apache.org To: commits@ariatosca.incubator.apache.org Message-Id: <3d6d0c9ca322413999895301b3b649a9@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: incubator-ariatosca git commit: ARIA-299 Resuming canceled execution with frozen task fails [Forced Update!] Date: Wed, 5 Jul 2017 15:18:34 +0000 (UTC) archived-at: Wed, 05 Jul 2017 15:18:41 -0000 Repository: incubator-ariatosca Updated Branches: refs/heads/ARIA-299-Resuming-canceled-execution-with-frozen-task-fails dd1189463 -> 975a3a3c2 (forced update) ARIA-299 Resuming canceled execution with frozen task fails Project: http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/commit/975a3a3c Tree: http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/tree/975a3a3c Diff: http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/diff/975a3a3c Branch: refs/heads/ARIA-299-Resuming-canceled-execution-with-frozen-task-fails Commit: 975a3a3c2343537ac922b62d1560fd90f0c20d69 Parents: 6c08424 Author: max-orlov Authored: Wed Jul 5 16:16:39 2017 +0300 Committer: max-orlov Committed: Wed Jul 5 18:18:19 2017 +0300 ---------------------------------------------------------------------- .../workflows/core/events_handler.py | 4 + aria/orchestrator/workflows/executor/base.py | 2 +- aria/orchestrator/workflows/executor/thread.py | 2 +- tests/orchestrator/test_workflow_runner.py | 144 ++++++++++++++++--- 4 files changed, 129 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/blob/975a3a3c/aria/orchestrator/workflows/core/events_handler.py ---------------------------------------------------------------------- diff --git a/aria/orchestrator/workflows/core/events_handler.py b/aria/orchestrator/workflows/core/events_handler.py index 769c1a8..eb6f271 100644 --- a/aria/orchestrator/workflows/core/events_handler.py +++ b/aria/orchestrator/workflows/core/events_handler.py @@ -114,6 +114,10 @@ def _workflow_cancelled(workflow_context, *args, **kwargs): elif execution.status in (execution.SUCCEEDED, execution.FAILED): _log_tried_to_cancel_execution_but_it_already_ended(workflow_context, execution.status) else: + # Any non ended task would be put back to pending state + for task in execution.tasks: + if not task.has_ended(): + task.status = task.PENDING execution.status = execution.CANCELLED execution.ended_at = datetime.utcnow() http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/blob/975a3a3c/aria/orchestrator/workflows/executor/base.py ---------------------------------------------------------------------- diff --git a/aria/orchestrator/workflows/executor/base.py b/aria/orchestrator/workflows/executor/base.py index ec1a0c7..e7d03ea 100644 --- a/aria/orchestrator/workflows/executor/base.py +++ b/aria/orchestrator/workflows/executor/base.py @@ -49,7 +49,7 @@ class BaseExecutor(logger.LoggerMixin): """ pass - def terminate(self, ctx): + def terminate(self, task_id): """ Terminate the executing task :return: http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/blob/975a3a3c/aria/orchestrator/workflows/executor/thread.py ---------------------------------------------------------------------- diff --git a/aria/orchestrator/workflows/executor/thread.py b/aria/orchestrator/workflows/executor/thread.py index d9dcdf8..6feef0c 100644 --- a/aria/orchestrator/workflows/executor/thread.py +++ b/aria/orchestrator/workflows/executor/thread.py @@ -54,7 +54,7 @@ class ThreadExecutor(BaseExecutor): def close(self): self._stopped = True for thread in self._pool: - thread.join() + thread.join(5) def _processor(self): while not self._stopped: http://git-wip-us.apache.org/repos/asf/incubator-ariatosca/blob/975a3a3c/tests/orchestrator/test_workflow_runner.py ---------------------------------------------------------------------- diff --git a/tests/orchestrator/test_workflow_runner.py b/tests/orchestrator/test_workflow_runner.py index e640c7d..6951a7f 100644 --- a/tests/orchestrator/test_workflow_runner.py +++ b/tests/orchestrator/test_workflow_runner.py @@ -23,7 +23,7 @@ import pytest from aria.modeling import exceptions as modeling_exceptions from aria.modeling import models from aria.orchestrator import exceptions -from aria.orchestrator.events import on_cancelled_workflow_signal +from aria.orchestrator import events from aria.orchestrator.workflow_runner import WorkflowRunner from aria.orchestrator.workflows.executor.process import ProcessExecutor from aria.orchestrator.workflows import api @@ -46,9 +46,10 @@ from ..fixtures import ( # pylint: disable=unused-import resource_storage as resource ) -events = { +custom_events = { 'is_resumed': Event(), 'is_active': Event(), + 'execution_cancelled': Event(), 'execution_ended': Event() } @@ -318,43 +319,54 @@ def _create_workflow_runner(request, workflow_name, inputs=None, executor=None, class TestResumableWorkflows(object): - def test_resume_workflow(self, workflow_context, executor): - node = workflow_context.model.node.get_by_name(tests_mock.models.DEPENDENCY_NODE_NAME) - node.attributes['invocations'] = models.Attribute.wrap('invocations', 0) - self._create_interface(workflow_context, node, mock_resuming_task) + def _create_initial_workflow_runner( + self, workflow_context, workflow, executor, inputs=None): service = workflow_context.service service.workflows['custom_workflow'] = tests_mock.models.create_operation( 'custom_workflow', - operation_kwargs={'function': '{0}.{1}'.format(__name__, mock_workflow.__name__)} + operation_kwargs={ + 'function': '{0}.{1}'.format(__name__, workflow.__name__), + 'inputs': dict((k, models.Input.wrap(k, v)) for k, v in (inputs or {}).items()) + } ) workflow_context.model.service.update(service) wf_runner = WorkflowRunner( service_id=workflow_context.service.id, - inputs={}, + inputs=inputs or {}, model_storage=workflow_context.model, resource_storage=workflow_context.resource, plugin_manager=None, workflow_name='custom_workflow', executor=executor) + return wf_runner + + def test_resume_workflow(self, workflow_context, thread_executor): + node = workflow_context.model.node.get_by_name(tests_mock.models.DEPENDENCY_NODE_NAME) + node.attributes['invocations'] = models.Attribute.wrap('invocations', 0) + self._create_interface(workflow_context, node, mock_resuming_task) + + wf_runner = self._create_initial_workflow_runner( + workflow_context, mock_parallel_workflow, thread_executor) + wf_thread = Thread(target=wf_runner.execute) wf_thread.daemon = True wf_thread.start() # Wait for the execution to start - if events['is_active'].wait(5) is False: + if custom_events['is_active'].wait(5) is False: raise TimeoutError("is_active wasn't set to True") wf_runner.cancel() - if events['execution_ended'].wait(60) is False: + if custom_events['execution_cancelled'].wait(60) is False: raise TimeoutError("Execution did not end") tasks = workflow_context.model.task.list(filters={'_stub_type': None}) assert any(task.status == task.SUCCESS for task in tasks) - assert any(task.status in (task.FAILED, task.RETRYING) for task in tasks) - events['is_resumed'].set() - assert any(task.status in (task.FAILED, task.RETRYING) for task in tasks) + assert any(task.status == task.PENDING for task in tasks) + custom_events['is_resumed'].set() + assert any(task.status == task.PENDING for task in tasks) # Create a new workflow runner, with an existing execution id. This would cause # the old execution to restart. @@ -365,7 +377,7 @@ class TestResumableWorkflows(object): resource_storage=workflow_context.resource, plugin_manager=None, execution_id=wf_runner.execution.id, - executor=executor) + executor=thread_executor) new_wf_runner.execute() @@ -374,9 +386,58 @@ class TestResumableWorkflows(object): assert node.attributes['invocations'].value == 3 assert wf_runner.execution.status == wf_runner.execution.SUCCEEDED + def test_resume_failed_task(self, workflow_context, thread_executor): + + node = workflow_context.model.node.get_by_name(tests_mock.models.DEPENDENCY_NODE_NAME) + node.attributes['invocations'] = models.Attribute.wrap('invocations', 0) + self._create_interface(workflow_context, node, mock_failed_first_task) + + wf_runner = self._create_initial_workflow_runner( + workflow_context, mock_sequential_workflow, thread_executor) + wf_thread = Thread(target=wf_runner.execute) + wf_thread.setDaemon(True) + wf_thread.start() + + if custom_events['is_active'].wait(60) is False: + raise TimeoutError("Execution did not end") + wf_runner.cancel() + if custom_events['execution_cancelled'].wait(60) is False: + raise TimeoutError("Execution did not end") + + task = workflow_context.model.task.list(filters={'_stub_type': None})[0] + assert node.attributes['invocations'].value == 2 + assert task.status == task.PENDING + assert wf_runner.execution.status in (wf_runner.execution.CANCELLED, + wf_runner.execution.CANCELLING) + + custom_events['is_resumed'].set() + assert node.attributes['invocations'].value == 2 + + # Create a new workflow runner, with an existing execution id. This would cause + # the old execution to restart. + new_thread_executor = thread.ThreadExecutor() + try: + new_wf_runner = WorkflowRunner( + service_id=wf_runner.service.id, + inputs={}, + model_storage=workflow_context.model, + resource_storage=workflow_context.resource, + plugin_manager=None, + execution_id=wf_runner.execution.id, + executor=new_thread_executor) + + new_wf_runner.execute() + finally: + new_thread_executor.close() + + # Wait for it to finish and assert changes. + assert node.attributes['invocations'].value == task.max_attempts - 1 + assert task.status == task.SUCCESS + assert wf_runner.execution.status == wf_runner.execution.SUCCEEDED + @staticmethod @pytest.fixture - def executor(): + def thread_executor(): result = thread.ThreadExecutor() try: yield result @@ -417,16 +478,23 @@ class TestResumableWorkflows(object): @pytest.fixture(autouse=True) def register_to_events(self): + def execution_cancelled(*args, **kwargs): + custom_events['execution_cancelled'].set() + def execution_ended(*args, **kwargs): - events['execution_ended'].set() + custom_events['execution_ended'].set() - on_cancelled_workflow_signal.connect(execution_ended) + events.on_cancelled_workflow_signal.connect(execution_cancelled) + events.on_failure_workflow_signal.connect(execution_ended) yield - on_cancelled_workflow_signal.disconnect(execution_ended) + events.on_cancelled_workflow_signal.disconnect(execution_cancelled) + events.on_failure_workflow_signal.disconnect(execution_ended) + for event in custom_events.values(): + event.clear() @workflow -def mock_workflow(ctx, graph): +def mock_parallel_workflow(ctx, graph): node = ctx.model.node.get_by_name(tests_mock.models.DEPENDENCY_NODE_NAME) graph.add_tasks( api.task.OperationTask( @@ -441,8 +509,42 @@ def mock_resuming_task(ctx): ctx.node.attributes['invocations'] += 1 if ctx.node.attributes['invocations'] != 1: - events['is_active'].set() - if not events['is_resumed'].isSet(): + custom_events['is_active'].set() + if not custom_events['is_resumed'].isSet(): # if resume was called, increase by one. o/w fail the execution - second task should # fail as long it was not a part of resuming the workflow raise BaseException("wasn't resumed yet") + + +@workflow +def mock_sequential_workflow(ctx, graph): + node = ctx.model.node.get_by_name(tests_mock.models.DEPENDENCY_NODE_NAME) + graph.sequence( + api.task.OperationTask(node, + interface_name='aria.interfaces.lifecycle', + operation_name='create', + retry_interval=1, + max_attempts=10), + ) + + +@operation +def mock_failed_first_task(ctx): + """ + The task should run atmost ctx.task.max_attempts - 1 times, and only then pass. + overall, the number of invocations should be ctx.task.max_attempts - 1 + """ + ctx.node.attributes['invocations'] += 1 + + if ctx.node.attributes['invocations'] == 2: + custom_events['is_active'].set() + # stuck the thread + while True: + pass + + elif ctx.node.attributes['invocations'] == ctx.task.max_attempts - 1: + # pass only just before the end. + return + else: + # fail o.w. + raise BaseException("stop this task")