airflow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [airflow] dazza-codes commented on a change in pull request #6811: [RFC][AIRFLOW-6245] Add custom waiters for AWS batch jobs
Date Sat, 28 Dec 2019 16:35:09 GMT
dazza-codes commented on a change in pull request #6811: [RFC][AIRFLOW-6245] Add custom waiters
for AWS batch jobs
URL: https://github.com/apache/airflow/pull/6811#discussion_r361802565
 
 

 ##########
 File path: airflow/providers/amazon/aws/hooks/batch_client.py
 ##########
 @@ -0,0 +1,551 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+A client for AWS batch services
+
+.. seealso::
+
+    - http://boto3.readthedocs.io/en/latest/guide/configuration.html
+    - http://boto3.readthedocs.io/en/latest/reference/services/batch.html
+    - https://docs.aws.amazon.com/batch/latest/APIReference/Welcome.html
+"""
+
+from random import uniform
+from time import sleep
+from typing import Dict, List, Optional, Union
+
+import botocore.client
+import botocore.exceptions
+import botocore.waiter
+from typing_extensions import Protocol, runtime_checkable
+
+from airflow import AirflowException, LoggingMixin
+from airflow.contrib.hooks.aws_hook import AwsHook
+
+# pylint: disable=invalid-name, unused-argument
+
+
+@runtime_checkable
+class AwsBatchProtocol(Protocol):
+    """
+    A structured Protocol for ``boto3.client('batch') -> botocore.client.Batch``.
+    This is used for type hints on :py:meth:`.AwsBatchClient.client`; it covers
+    only the subset of client methods required.
+
+    .. seealso::
+
+        - https://mypy.readthedocs.io/en/latest/protocols.html
+        - http://boto3.readthedocs.io/en/latest/reference/services/batch.html
+    """
+
+    def describe_jobs(self, jobs: List[str]) -> Dict:
+        """
+        Get job descriptions from AWS batch
+
+        :param jobs: a list of JobId to describe
+        :type jobs: List[str]
+
+        :return: an API response to describe jobs
+        :rtype: Dict
+        """
+        ...
+
+    def get_waiter(self, waiterName: str) -> botocore.waiter.Waiter:
+        """
+        Get an AWS Batch service waiter
+
+        :param waiterName: The name of the waiter.  The name should match
+            the name (including the casing) of the key name in the waiter
+            model file (typically this is CamelCasing).
+        :type waiterName: str
+
+        :return: a waiter object for the named AWS batch service
+        :rtype: botocore.waiter.Waiter
+
+        .. note::
+            AWS batch might not have any waiters (until botocore PR-1307 is released).
+
+            .. code-block:: python
+
+                import boto3
+                boto3.client('batch').waiter_names == []
+
+        .. seealso::
+
+            - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/clients.html#waiters
+            - https://github.com/boto/botocore/pull/1307
+        """
+        ...
+
+    def submit_job(
+        self,
+        jobName: str,
+        jobQueue: str,
+        jobDefinition: str,
+        arrayProperties: Dict,
+        parameters: Dict,
+        containerOverrides: Dict,
+    ) -> Dict:
+        """
+        Submit a batch job
+
+        :param jobName: the name for the AWS batch job
+        :type jobName: str
+
+        :param jobQueue: the queue name on AWS Batch
+        :type jobQueue: str
+
+        :param jobDefinition: the job definition name on AWS Batch
+        :type jobDefinition: str
+
+        :param arrayProperties: the same parameter that boto3 will receive
+        :type arrayProperties: Dict
+
+        :param parameters: the same parameter that boto3 will receive
+        :type parameters: Dict
+
+        :param containerOverrides: the same parameter that boto3 will receive
+        :type containerOverrides: Dict
+
+        :return: an API response
+        :rtype: Dict
+        """
+        ...
+
+    def terminate_job(self, jobId: str, reason: str) -> Dict:
+        """
+        Terminate a batch job
+
+        :param jobId: a job ID to terminate
+        :type jobId: str
+
+        :param reason: a reason to terminate job ID
+        :type reason: str
+
+        :return: an API response
+        :rtype: Dict
+        """
+        ...
+
+
+class AwsBatchClient(LoggingMixin):
+    """
+    A client for AWS batch services.
+
+    :param max_retries: exponential back-off retries, 4200 = 48 hours;
+        polling is only used when waiters is None
+    :type max_retries: Optional[int]
+
+    :param status_retries: number of HTTP retries to get job status, 10;
+        polling is only used when waiters is None
+    :type status_retries: Optional[int]
+
+    :param aws_conn_id: connection id of AWS credentials / region name. If None,
+        credential boto3 strategy will be used
+        (http://boto3.readthedocs.io/en/latest/guide/configuration.html).
+    :type aws_conn_id: Optional[str]
+
+    :param region_name: region name to use in AWS client.
+        Override the region_name in connection (if provided)
+    :type region_name: Optional[str]
+
+    .. note::
+        Several methods use a default random delay to check or poll for job status, i.e.
+        ``random.uniform(DEFAULT_DELAY_MIN, DEFAULT_DELAY_MAX)``
+        Using a random interval helps to avoid AWS API throttle limits
+        when many concurrent tasks request job-descriptions.
+
+        To modify the global defaults for the range of jitter allowed when a
+        random delay is used to check batch job status, modify these defaults, e.g.:
+        .. code-block::
+
+            AwsBatchClient.DEFAULT_DELAY_MIN = 0
+            AwsBatchClient.DEFAULT_DELAY_MAX = 5
+
+        When explict delay values are used, a 1 second random jitter is applied to the
+        delay (e.g. a delay of 0 sec will be a ``random.uniform(0, 1)`` delay.  It is
+        generally recommended that random jitter is added to API requests.  A
+        convenience method is provided for this, e.g. to get a random delay of
+        10 sec +/- 5 sec: ``delay = AwsBatchClient.add_jitter(10, width=5, minima=0)``
+
+    .. seealso::
+        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/batch.html
+        - https://docs.aws.amazon.com/general/latest/gr/api-retries.html
+        - https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
+    """
+
+    MAX_RETRIES = 4200
+    STATUS_RETRIES = 10
+
+    # delays are in seconds
+    DEFAULT_DELAY_MIN = 1
+    DEFAULT_DELAY_MAX = 10
+
+    def __init__(
+        self,
+        max_retries: Optional[int] = None,
+        status_retries: Optional[int] = None,
+        aws_conn_id: Optional[str] = None,
+        region_name: Optional[str] = None,
+    ):
+        super().__init__()
+        self.max_retries = max_retries or self.MAX_RETRIES
+        self.status_retries = status_retries or self.STATUS_RETRIES
+        self.aws_conn_id = aws_conn_id
+        self.region_name = region_name
+        self._hook = None  # type: Union[AwsHook, None]
+        self._client = None  # type: Union[AwsBatchProtocol, botocore.client.BaseClient,
None]
+
+    @property
+    def hook(self) -> AwsHook:
+        """
+        An AWS API connection manager (wraps boto3)
+
+        :rtype: AwsHook
 
 Review comment:
   OK, thanks.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message