airflow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (AIRFLOW-3018) Fix Documentation
Date Thu, 06 Sep 2018 02:02:00 GMT

    [ https://issues.apache.org/jira/browse/AIRFLOW-3018?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16605166#comment-16605166
] 

ASF GitHub Bot commented on AIRFLOW-3018:
-----------------------------------------

r39132 closed pull request #3852: [AIRFLOW-3018] Fix Minor issues in Documentation
URL: https://github.com/apache/incubator-airflow/pull/3852
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/airflow/contrib/hooks/azure_data_lake_hook.py b/airflow/contrib/hooks/azure_data_lake_hook.py
index 1a02d78f27..74ae194809 100644
--- a/airflow/contrib/hooks/azure_data_lake_hook.py
+++ b/airflow/contrib/hooks/azure_data_lake_hook.py
@@ -59,7 +59,7 @@ def check_for_file(self, file_path):
         :param file_path: Path and name of the file.
         :type file_path: str
         :return: True if the file exists, False otherwise.
-        :rtype bool
+        :rtype: bool
         """
         try:
             files = self.connection.glob(file_path, details=False, invalidate_cache=True)
diff --git a/airflow/contrib/hooks/azure_fileshare_hook.py b/airflow/contrib/hooks/azure_fileshare_hook.py
index d4066ee549..8afa1540d7 100644
--- a/airflow/contrib/hooks/azure_fileshare_hook.py
+++ b/airflow/contrib/hooks/azure_fileshare_hook.py
@@ -56,7 +56,7 @@ def check_for_directory(self, share_name, directory_name, **kwargs):
             `FileService.exists()` takes.
         :type kwargs: object
         :return: True if the file exists, False otherwise.
-        :rtype bool
+        :rtype: bool
         """
         return self.connection.exists(share_name, directory_name,
                                       **kwargs)
@@ -75,7 +75,7 @@ def check_for_file(self, share_name, directory_name, file_name, **kwargs):
             `FileService.exists()` takes.
         :type kwargs: object
         :return: True if the file exists, False otherwise.
-        :rtype bool
+        :rtype: bool
         """
         return self.connection.exists(share_name, directory_name,
                                       file_name, **kwargs)
@@ -92,7 +92,7 @@ def list_directories_and_files(self, share_name, directory_name=None, **kwargs):
             `FileService.list_directories_and_files()` takes.
         :type kwargs: object
         :return: A list of files and directories
-        :rtype list
+        :rtype: list
         """
         return self.connection.list_directories_and_files(share_name,
                                                           directory_name,
@@ -110,7 +110,7 @@ def create_directory(self, share_name, directory_name, **kwargs):
             `FileService.create_directory()` takes.
         :type kwargs: object
         :return: A list of files and directories
-        :rtype list
+        :rtype: list
         """
         return self.connection.create_directory(share_name, directory_name, **kwargs)
 
diff --git a/airflow/contrib/hooks/gcp_container_hook.py b/airflow/contrib/hooks/gcp_container_hook.py
index 0047b8dbeb..3934f07a95 100644
--- a/airflow/contrib/hooks/gcp_container_hook.py
+++ b/airflow/contrib/hooks/gcp_container_hook.py
@@ -101,6 +101,7 @@ def _append_label(cluster_proto, key, val):
 
         Labels must fit the regex [a-z]([-a-z0-9]*[a-z0-9])? (current airflow version
         string follows semantic versioning spec: x.y.z).
+
         :param cluster_proto: The proto to append resource_label airflow version to
         :type cluster_proto: google.cloud.container_v1.types.Cluster
         :param key: The key label
diff --git a/airflow/contrib/hooks/gcs_hook.py b/airflow/contrib/hooks/gcs_hook.py
index 3d42ec4426..e3c3747e0b 100644
--- a/airflow/contrib/hooks/gcs_hook.py
+++ b/airflow/contrib/hooks/gcs_hook.py
@@ -65,6 +65,7 @@ def copy(self, source_bucket, source_object, destination_bucket=None,
         :type destination_bucket: string
         :param destination_object: The (renamed) path of the object if given.
             Can be omitted; then the same name is used.
+        :type destination_object: string
         """
         destination_bucket = destination_bucket or source_bucket
         destination_object = destination_object or source_object
diff --git a/airflow/contrib/hooks/wasb_hook.py b/airflow/contrib/hooks/wasb_hook.py
index 1d73abd78b..130c19469b 100644
--- a/airflow/contrib/hooks/wasb_hook.py
+++ b/airflow/contrib/hooks/wasb_hook.py
@@ -58,7 +58,7 @@ def check_for_blob(self, container_name, blob_name, **kwargs):
             `BlockBlobService.exists()` takes.
         :type kwargs: object
         :return: True if the blob exists, False otherwise.
-        :rtype bool
+        :rtype: bool
         """
         return self.connection.exists(container_name, blob_name, **kwargs)
 
@@ -74,7 +74,7 @@ def check_for_prefix(self, container_name, prefix, **kwargs):
             `BlockBlobService.list_blobs()` takes.
         :type kwargs: object
         :return: True if blobs matching the prefix exist, False otherwise.
-        :rtype bool
+        :rtype: bool
         """
         matches = self.connection.list_blobs(container_name, prefix,
                                              num_results=1, **kwargs)
diff --git a/airflow/contrib/operators/awsbatch_operator.py b/airflow/contrib/operators/awsbatch_operator.py
index 4008c90c47..3c778e6e68 100644
--- a/airflow/contrib/operators/awsbatch_operator.py
+++ b/airflow/contrib/operators/awsbatch_operator.py
@@ -43,11 +43,11 @@ class AWSBatchOperator(BaseOperator):
     :param job_queue: the queue name on AWS Batch
     :type job_queue: str
     :param overrides: the same parameter that boto3 will receive on
-        containerOverrides (templated).
+        containerOverrides (templated):
         http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job
     :type overrides: dict
-    :param max_retries: exponential backoff retries while waiter is not merged,
-        4200 = 48 hours
+    :param max_retries: exponential backoff retries while waiter is not
+        merged, 4200 = 48 hours
     :type max_retries: int
     :param aws_conn_id: connection id of AWS credentials / region name. If None,
         credential boto3 strategy will be used
diff --git a/airflow/contrib/operators/bigquery_check_operator.py b/airflow/contrib/operators/bigquery_check_operator.py
index 3eba0771db..ff7b97eea8 100644
--- a/airflow/contrib/operators/bigquery_check_operator.py
+++ b/airflow/contrib/operators/bigquery_check_operator.py
@@ -113,7 +113,7 @@ class BigQueryIntervalCheckOperator(IntervalCheckOperator):
     This method constructs a query like so ::
 
         SELECT {metrics_threshold_dict_key} FROM {table}
-            WHERE {date_filter_column}=<date>
+        WHERE {date_filter_column}=<date>
 
     :param table: the table name
     :type table: str
diff --git a/airflow/contrib/operators/bigquery_get_data.py b/airflow/contrib/operators/bigquery_get_data.py
index ab8f71b717..6e0c6c44de 100644
--- a/airflow/contrib/operators/bigquery_get_data.py
+++ b/airflow/contrib/operators/bigquery_get_data.py
@@ -51,7 +51,7 @@ class BigQueryGetDataOperator(BaseOperator):
         )
 
     :param dataset_id: The dataset ID of the requested table. (templated)
-    :type destination_dataset_table: string
+    :type dataset_id: string
     :param table_id: The table ID of the requested table. (templated)
     :type table_id: string
     :param max_results: The maximum number of records (rows) to be fetched
diff --git a/airflow/contrib/operators/bigquery_operator.py b/airflow/contrib/operators/bigquery_operator.py
index b0c0ce2d6e..e9366bf2ef 100644
--- a/airflow/contrib/operators/bigquery_operator.py
+++ b/airflow/contrib/operators/bigquery_operator.py
@@ -366,7 +366,7 @@ class BigQueryCreateExternalTableOperator(BaseOperator):
     :param source_objects: List of Google cloud storage URIs to point
         table to. (templated)
         If source_format is 'DATASTORE_BACKUP', the list must only contain a single URI.
-    :type object: list
+    :type source_objects: list
     :param destination_project_dataset_table: The dotted (<project>.)<dataset>.<table>
         BigQuery table to load data into (templated). If <project> is not included,
         project will be the project defined in the connection json.
@@ -383,7 +383,7 @@ class BigQueryCreateExternalTableOperator(BaseOperator):
     :type schema_fields: list
     :param schema_object: If set, a GCS object path pointing to a .json file that
         contains the schema for the table. (templated)
-    :param schema_object: string
+    :type schema_object: string
     :param source_format: File format of the data.
     :type source_format: string
     :param compression: [Optional] The compression type of the data source.
diff --git a/airflow/contrib/operators/bigquery_to_gcs.py b/airflow/contrib/operators/bigquery_to_gcs.py
index 278b208047..7cefd7e815 100644
--- a/airflow/contrib/operators/bigquery_to_gcs.py
+++ b/airflow/contrib/operators/bigquery_to_gcs.py
@@ -31,8 +31,8 @@ class BigQueryToCloudStorageOperator(BaseOperator):
         https://cloud.google.com/bigquery/docs/reference/v2/jobs
 
     :param source_project_dataset_table: The dotted
-        (<project>.|<project>:)<dataset>.<table> BigQuery table to
use as the source
-        data. If <project> is not included, project will be the project
+        ``(<project>.|<project>:)<dataset>.<table>`` BigQuery table
to use as the
+        source data. If <project> is not included, project will be the project
         defined in the connection json. (templated)
     :type source_project_dataset_table: string
     :param destination_cloud_storage_uris: The destination Google Cloud
@@ -43,7 +43,7 @@ class BigQueryToCloudStorageOperator(BaseOperator):
     :param compression: Type of compression to use.
     :type compression: string
     :param export_format: File format to export.
-    :type field_delimiter: string
+    :type export_format: string
     :param field_delimiter: The delimiter to use when extracting to a CSV.
     :type field_delimiter: string
     :param print_header: Whether to print a header for a CSV file extract.
diff --git a/airflow/contrib/operators/databricks_operator.py b/airflow/contrib/operators/databricks_operator.py
index 3245a99256..53ee30ef13 100644
--- a/airflow/contrib/operators/databricks_operator.py
+++ b/airflow/contrib/operators/databricks_operator.py
@@ -32,7 +32,7 @@
 
 class DatabricksSubmitRunOperator(BaseOperator):
     """
-    Submits an Spark job run to Databricks using the
+    Submits a Spark job run to Databricks using the
     `api/2.0/jobs/runs/submit
     <https://docs.databricks.com/api/latest/jobs.html#runs-submit>`_
     API endpoint.
diff --git a/airflow/contrib/operators/dataflow_operator.py b/airflow/contrib/operators/dataflow_operator.py
index 7cb950ab43..6a44006ec8 100644
--- a/airflow/contrib/operators/dataflow_operator.py
+++ b/airflow/contrib/operators/dataflow_operator.py
@@ -16,7 +16,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import os
+
 import re
 import uuid
 import copy
@@ -33,6 +33,39 @@ class DataFlowJavaOperator(BaseOperator):
     Start a Java Cloud DataFlow batch job. The parameters of the operation
     will be passed to the job.
 
+    .. seealso::
+        For more detail on job submission have a look at the reference:
+        https://cloud.google.com/dataflow/pipelines/specifying-exec-params
+
+    :param jar: The reference to a self executing DataFlow jar.
+    :type jar: string
+    :param dataflow_default_options: Map of default job options.
+    :type dataflow_default_options: dict
+    :param options: Map of job specific options.
+    :type options: dict
+    :param gcp_conn_id: The connection ID to use connecting to Google Cloud
+    Platform.
+    :type gcp_conn_id: string
+    :param delegate_to: The account to impersonate, if any.
+        For this to work, the service account making the request must have
+        domain-wide delegation enabled.
+    :type delegate_to: string
+    :param poll_sleep: The time in seconds to sleep between polling Google
+        Cloud Platform for the dataflow job status while the job is in the
+        JOB_STATE_RUNNING state.
+    :type poll_sleep: int
+    :param job_class: The name of the dataflow job class to be executued, it
+    is often not the main class configured in the dataflow jar file.
+    :type job_class: string
+
+    Both ``jar`` and ``options`` are templated so you can use variables in them.
+
+    Note that both
+    ``dataflow_default_options`` and ``options`` will be merged to specify pipeline
+    execution parameter, and ``dataflow_default_options`` is expected to save
+    high-level options, for instances, project and zone information, which
+    apply to all dataflow operators in the DAG.
+
     It's a good practice to define dataflow_* parameters in the default_args of the dag
     like the project, zone and staging location.
 
@@ -66,7 +99,6 @@ class DataFlowJavaOperator(BaseOperator):
            gcp_conn_id='gcp-airflow-service-account',
            dag=my-dag)
 
-    Both ``jar`` and ``options`` are templated so you can use variables in them.
     """
     template_fields = ['options', 'jar']
     ui_color = '#0273d4'
@@ -83,39 +115,6 @@ def __init__(
             job_class=None,
             *args,
             **kwargs):
-        """
-        Create a new DataFlowJavaOperator. Note that both
-        dataflow_default_options and options will be merged to specify pipeline
-        execution parameter, and dataflow_default_options is expected to save
-        high-level options, for instances, project and zone information, which
-        apply to all dataflow operators in the DAG.
-
-
-        .. seealso::
-            For more detail on job submission have a look at the reference:
-            https://cloud.google.com/dataflow/pipelines/specifying-exec-params
-
-        :param jar: The reference to a self executing DataFlow jar.
-        :type jar: string
-        :param dataflow_default_options: Map of default job options.
-        :type dataflow_default_options: dict
-        :param options: Map of job specific options.
-        :type options: dict
-        :param gcp_conn_id: The connection ID to use connecting to Google Cloud
-        Platform.
-        :type gcp_conn_id: string
-        :param delegate_to: The account to impersonate, if any.
-            For this to work, the service account making the request must have
-            domain-wide delegation enabled.
-        :type delegate_to: string
-        :param poll_sleep: The time in seconds to sleep between polling Google
-            Cloud Platform for the dataflow job status while the job is in the
-            JOB_STATE_RUNNING state.
-        :type poll_sleep: int
-        :param job_class: The name of the dataflow job class to be executued, it
-        is often not the main class configured in the dataflow jar file.
-        :type job_class: string
-        """
         super(DataFlowJavaOperator, self).__init__(*args, **kwargs)
 
         dataflow_default_options = dataflow_default_options or {}
@@ -149,6 +148,25 @@ class DataflowTemplateOperator(BaseOperator):
     """
     Start a Templated Cloud DataFlow batch job. The parameters of the operation
     will be passed to the job.
+
+    :param template: The reference to the DataFlow template.
+    :type template: string
+    :param dataflow_default_options: Map of default job environment options.
+    :type dataflow_default_options: dict
+    :param parameters: Map of job specific parameters for the template.
+    :type parameters: dict
+    :param gcp_conn_id: The connection ID to use connecting to Google Cloud
+    Platform.
+    :type gcp_conn_id: string
+    :param delegate_to: The account to impersonate, if any.
+        For this to work, the service account making the request must have
+        domain-wide delegation enabled.
+    :type delegate_to: string
+    :param poll_sleep: The time in seconds to sleep between polling Google
+        Cloud Platform for the dataflow job status while the job is in the
+        JOB_STATE_RUNNING state.
+    :type poll_sleep: int
+
     It's a good practice to define dataflow_* parameters in the default_args of the dag
     like the project, zone and staging location.
 
@@ -185,6 +203,16 @@ class DataflowTemplateOperator(BaseOperator):
 
     ``template``, ``dataflow_default_options`` and ``parameters`` are templated so you can
     use variables in them.
+
+    Note that ``dataflow_default_options`` is expected to save high-level options
+    for project information, which apply to all dataflow operators in the DAG.
+
+        .. seealso::
+            https://cloud.google.com/dataflow/docs/reference/rest/v1b3
+            /LaunchTemplateParameters
+            https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment
+            For more detail on job template execution have a look at the reference:
+            https://cloud.google.com/dataflow/docs/templates/executing-templates
     """
     template_fields = ['parameters', 'dataflow_default_options', 'template']
     ui_color = '#0273d4'
@@ -200,36 +228,6 @@ def __init__(
             poll_sleep=10,
             *args,
             **kwargs):
-        """
-        Create a new DataflowTemplateOperator. Note that
-        dataflow_default_options is expected to save high-level options
-        for project information, which apply to all dataflow operators in the DAG.
-
-        .. seealso::
-            https://cloud.google.com/dataflow/docs/reference/rest/v1b3
-            /LaunchTemplateParameters
-            https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment
-            For more detail on job template execution have a look at the reference:
-            https://cloud.google.com/dataflow/docs/templates/executing-templates
-
-        :param template: The reference to the DataFlow template.
-        :type template: string
-        :param dataflow_default_options: Map of default job environment options.
-        :type dataflow_default_options: dict
-        :param parameters: Map of job specific parameters for the template.
-        :type parameters: dict
-        :param gcp_conn_id: The connection ID to use connecting to Google Cloud
-        Platform.
-        :type gcp_conn_id: string
-        :param delegate_to: The account to impersonate, if any.
-            For this to work, the service account making the request must have
-            domain-wide delegation enabled.
-        :type delegate_to: string
-        :param poll_sleep: The time in seconds to sleep between polling Google
-            Cloud Platform for the dataflow job status while the job is in the
-            JOB_STATE_RUNNING state.
-        :type poll_sleep: int
-        """
         super(DataflowTemplateOperator, self).__init__(*args, **kwargs)
 
         dataflow_default_options = dataflow_default_options or {}
@@ -253,7 +251,7 @@ def execute(self, context):
 
 class DataFlowPythonOperator(BaseOperator):
     """
-    Create a new DataFlowPythonOperator. Note that both
+    Launching Cloud Dataflow jobs written in python. Note that both
     dataflow_default_options and options will be merged to specify pipeline
     execution parameter, and dataflow_default_options is expected to save
     high-level options, for instances, project and zone information, which
@@ -284,7 +282,6 @@ class DataFlowPythonOperator(BaseOperator):
         JOB_STATE_RUNNING state.
     :type poll_sleep: int
     """
-
     template_fields = ['options', 'dataflow_default_options']
 
     @apply_defaults
diff --git a/airflow/contrib/operators/dataproc_operator.py b/airflow/contrib/operators/dataproc_operator.py
index 4b0cd899f0..5ce2ff1964 100644
--- a/airflow/contrib/operators/dataproc_operator.py
+++ b/airflow/contrib/operators/dataproc_operator.py
@@ -70,8 +70,7 @@ class DataprocClusterCreateOperator(BaseOperator):
     :type image_version: string
     :param properties: dict of properties to set on
         config files (e.g. spark-defaults.conf), see
-        https://cloud.google.com/dataproc/docs/reference/rest/v1/ \
-        projects.regions.clusters#SoftwareConfig
+        https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig
     :type properties: dict
     :param master_machine_type: Compute engine machine type to use for the master node
     :type master_machine_type: string
diff --git a/airflow/contrib/operators/emr_add_steps_operator.py b/airflow/contrib/operators/emr_add_steps_operator.py
index 2f2f4afcec..959543e617 100644
--- a/airflow/contrib/operators/emr_add_steps_operator.py
+++ b/airflow/contrib/operators/emr_add_steps_operator.py
@@ -27,7 +27,7 @@ class EmrAddStepsOperator(BaseOperator):
     An operator that adds steps to an existing EMR job_flow.
 
     :param job_flow_id: id of the JobFlow to add steps to. (templated)
-    :type job_flow_name: str
+    :type job_flow_id: str
     :param aws_conn_id: aws connection to uses
     :type aws_conn_id: str
     :param steps: boto3 style steps to be added to the jobflow. (templated)
diff --git a/airflow/contrib/operators/emr_create_job_flow_operator.py b/airflow/contrib/operators/emr_create_job_flow_operator.py
index 1e0dfc386f..42886d6006 100644
--- a/airflow/contrib/operators/emr_create_job_flow_operator.py
+++ b/airflow/contrib/operators/emr_create_job_flow_operator.py
@@ -33,7 +33,7 @@ class EmrCreateJobFlowOperator(BaseOperator):
     :param emr_conn_id: emr connection to use
     :type emr_conn_id: str
     :param job_flow_overrides: boto3 style arguments to override
-           emr_connection extra. (templated)
+       emr_connection extra. (templated)
     :type steps: dict
     """
     template_fields = ['job_flow_overrides']
diff --git a/airflow/contrib/operators/emr_terminate_job_flow_operator.py b/airflow/contrib/operators/emr_terminate_job_flow_operator.py
index d265733842..8e4c0687cc 100644
--- a/airflow/contrib/operators/emr_terminate_job_flow_operator.py
+++ b/airflow/contrib/operators/emr_terminate_job_flow_operator.py
@@ -27,7 +27,7 @@ class EmrTerminateJobFlowOperator(BaseOperator):
     Operator to terminate EMR JobFlows.
 
     :param job_flow_id: id of the JobFlow to terminate. (templated)
-    :type job_flow_name: str
+    :type job_flow_id: str
     :param aws_conn_id: aws connection to uses
     :type aws_conn_id: str
     """
diff --git a/airflow/contrib/operators/gcp_container_operator.py b/airflow/contrib/operators/gcp_container_operator.py
index c99f2a93f2..fda4d44b9d 100644
--- a/airflow/contrib/operators/gcp_container_operator.py
+++ b/airflow/contrib/operators/gcp_container_operator.py
@@ -29,6 +29,36 @@
 
 
 class GKEClusterDeleteOperator(BaseOperator):
+    """
+    Deletes the cluster, including the Kubernetes endpoint and all worker nodes.
+
+    To delete a certain cluster, you must specify the ``project_id``, the ``name``
+    of the cluster, the ``location`` that the cluster is in, and the ``task_id``.
+
+    **Operator Creation**: ::
+
+        operator = GKEClusterDeleteOperator(
+                    task_id='cluster_delete',
+                    project_id='my-project',
+                    location='cluster-location'
+                    name='cluster-name')
+
+    .. seealso::
+        For more detail about deleting clusters have a look at the reference:
+        https://google-cloud-python.readthedocs.io/en/latest/container/gapic/v1/api.html#google.cloud.container_v1.ClusterManagerClient.delete_cluster
+
+    :param project_id: The Google Developers Console [project ID or project number]
+    :type project_id: str
+    :param name: The name of the resource to delete, in this case cluster name
+    :type name: str
+    :param location: The name of the Google Compute Engine zone in which the cluster
+        resides.
+    :type location: str
+    :param gcp_conn_id: The connection ID to use connecting to Google Cloud Platform.
+    :type gcp_conn_id: str
+    :param api_version: The api version to use
+    :type api_version: str
+    """
     template_fields = ['project_id', 'gcp_conn_id', 'name', 'location', 'api_version']
 
     @apply_defaults
@@ -40,37 +70,6 @@ def __init__(self,
                  api_version='v2',
                  *args,
                  **kwargs):
-        """
-        Deletes the cluster, including the Kubernetes endpoint and all worker nodes.
-
-
-        To delete a certain cluster, you must specify the ``project_id``, the ``name``
-        of the cluster, the ``location`` that the cluster is in, and the ``task_id``.
-
-        **Operator Creation**: ::
-
-            operator = GKEClusterDeleteOperator(
-                        task_id='cluster_delete',
-                        project_id='my-project',
-                        location='cluster-location'
-                        name='cluster-name')
-
-        .. seealso::
-            For more detail about deleting clusters have a look at the reference:
-            https://google-cloud-python.readthedocs.io/en/latest/container/gapic/v1/api.html#google.cloud.container_v1.ClusterManagerClient.delete_cluster
-
-        :param project_id: The Google Developers Console [project ID or project number]
-        :type project_id: str
-        :param name: The name of the resource to delete, in this case cluster name
-        :type name: str
-        :param location: The name of the Google Compute Engine zone in which the cluster
-            resides.
-        :type location: str
-        :param gcp_conn_id: The connection ID to use connecting to Google Cloud Platform.
-        :type gcp_conn_id: str
-        :param api_version: The api version to use
-        :type api_version: str
-        """
         super(GKEClusterDeleteOperator, self).__init__(*args, **kwargs)
 
         self.project_id = project_id
@@ -93,6 +92,48 @@ def execute(self, context):
 
 
 class GKEClusterCreateOperator(BaseOperator):
+    """
+    Create a Google Kubernetes Engine Cluster of specified dimensions
+    The operator will wait until the cluster is created.
+
+    The **minimum** required to define a cluster to create is:
+
+    ``dict()`` ::
+        cluster_def = {'name': 'my-cluster-name',
+                       'initial_node_count': 1}
+
+    or
+
+    ``Cluster`` proto ::
+        from google.cloud.container_v1.types import Cluster
+
+        cluster_def = Cluster(name='my-cluster-name', initial_node_count=1)
+
+    **Operator Creation**: ::
+
+        operator = GKEClusterCreateOperator(
+                    task_id='cluster_create',
+                    project_id='my-project',
+                    location='my-location'
+                    body=cluster_def)
+
+    .. seealso::
+        For more detail on about creating clusters have a look at the reference:
+        https://google-cloud-python.readthedocs.io/en/latest/container/gapic/v1/types.html#google.cloud.container_v1.types.Cluster
+
+    :param project_id: The Google Developers Console [project ID or project number]
+    :type project_id: str
+    :param location: The name of the Google Compute Engine zone in which the cluster
+        resides.
+    :type location: str
+    :param body: The Cluster definition to create, can be protobuf or python dict, if
+        dict it must match protobuf message Cluster
+    :type body: dict or google.cloud.container_v1.types.Cluster
+    :param gcp_conn_id: The connection ID to use connecting to Google Cloud Platform.
+    :type gcp_conn_id: str
+    :param api_version: The api version to use
+    :type api_version: str
+    """
     template_fields = ['project_id', 'gcp_conn_id', 'location', 'api_version', 'body']
 
     @apply_defaults
@@ -104,48 +145,6 @@ def __init__(self,
                  api_version='v2',
                  *args,
                  **kwargs):
-        """
-        Create a Google Kubernetes Engine Cluster of specified dimensions
-        The operator will wait until the cluster is created.
-
-        The **minimum** required to define a cluster to create is:
-
-        ``dict()`` ::
-            cluster_def = {'name': 'my-cluster-name',
-                           'initial_node_count': 1}
-
-        or
-
-        ``Cluster`` proto ::
-            from google.cloud.container_v1.types import Cluster
-
-            cluster_def = Cluster(name='my-cluster-name', initial_node_count=1)
-
-        **Operator Creation**: ::
-
-            operator = GKEClusterCreateOperator(
-                        task_id='cluster_create',
-                        project_id='my-project',
-                        location='my-location'
-                        body=cluster_def)
-
-        .. seealso::
-            For more detail on about creating clusters have a look at the reference:
-            https://google-cloud-python.readthedocs.io/en/latest/container/gapic/v1/types.html#google.cloud.container_v1.types.Cluster
-
-        :param project_id: The Google Developers Console [project ID or project number]
-        :type project_id: str
-        :param location: The name of the Google Compute Engine zone in which the cluster
-            resides.
-        :type location: str
-        :param body: The Cluster definition to create, can be protobuf or python dict, if
-            dict it must match protobuf message Cluster
-        :type body: dict or google.cloud.container_v1.types.Cluster
-        :param gcp_conn_id: The connection ID to use connecting to Google Cloud Platform.
-        :type gcp_conn_id: str
-        :param api_version: The api version to use
-        :type api_version: str
-        """
         super(GKEClusterCreateOperator, self).__init__(*args, **kwargs)
 
         if body is None:
@@ -184,6 +183,44 @@ def execute(self, context):
 
 
 class GKEPodOperator(KubernetesPodOperator):
+    """
+    Executes a task in a Kubernetes pod in the specified Google Kubernetes
+    Engine cluster
+
+    This Operator assumes that the system has gcloud installed and either
+    has working default application credentials or has configured a
+    connection id with a service account.
+
+    The **minimum** required to define a cluster to create are the variables
+    ``task_id``, ``project_id``, ``location``, ``cluster_name``, ``name``,
+    ``namespace``, and ``image``
+
+    **Operator Creation**: ::
+
+        operator = GKEPodOperator(task_id='pod_op',
+                                  project_id='my-project',
+                                  location='us-central1-a',
+                                  cluster_name='my-cluster-name',
+                                  name='task-name',
+                                  namespace='default',
+                                  image='perl')
+
+    .. seealso::
+        For more detail about application authentication have a look at the reference:
+        https://cloud.google.com/docs/authentication/production#providing_credentials_to_your_application
+
+    :param project_id: The Google Developers Console project id
+    :type project_id: str
+    :param location: The name of the Google Kubernetes Engine zone in which the
+        cluster resides, e.g. 'us-central1-a'
+    :type location: str
+    :param cluster_name: The name of the Google Kubernetes Engine cluster the pod
+        should be spawned in
+    :type cluster_name: str
+    :param gcp_conn_id: The google cloud connection id to use. This allows for
+        users to specify a service account.
+    :type gcp_conn_id: str
+    """
     template_fields = ('project_id', 'location',
                        'cluster_name') + KubernetesPodOperator.template_fields
 
@@ -195,44 +232,6 @@ def __init__(self,
                  gcp_conn_id='google_cloud_default',
                  *args,
                  **kwargs):
-        """
-        Executes a task in a Kubernetes pod in the specified Google Kubernetes
-        Engine cluster
-
-        This Operator assumes that the system has gcloud installed and either
-        has working default application credentials or has configured a
-        connection id with a service account.
-
-        The **minimum** required to define a cluster to create are the variables
-        ``task_id``, ``project_id``, ``location``, ``cluster_name``, ``name``,
-        ``namespace``, and ``image``
-
-        **Operator Creation**: ::
-
-            operator = GKEPodOperator(task_id='pod_op',
-                                      project_id='my-project',
-                                      location='us-central1-a',
-                                      cluster_name='my-cluster-name',
-                                      name='task-name',
-                                      namespace='default',
-                                      image='perl')
-
-        .. seealso::
-            For more detail about application authentication have a look at the reference:
-            https://cloud.google.com/docs/authentication/production#providing_credentials_to_your_application
-
-        :param project_id: The Google Developers Console project id
-        :type project_id: str
-        :param location: The name of the Google Kubernetes Engine zone in which the
-            cluster resides, e.g. 'us-central1-a'
-        :type location: str
-        :param cluster_name: The name of the Google Kubernetes Engine cluster the pod
-            should be spawned in
-        :type cluster_name: str
-        :param gcp_conn_id: The google cloud connection id to use. This allows for
-            users to specify a service account.
-        :type gcp_conn_id: str
-        """
         super(GKEPodOperator, self).__init__(*args, **kwargs)
         self.project_id = project_id
         self.location = location
diff --git a/airflow/contrib/operators/gcs_to_bq.py b/airflow/contrib/operators/gcs_to_bq.py
index 69acb61659..40b6d2381f 100644
--- a/airflow/contrib/operators/gcs_to_bq.py
+++ b/airflow/contrib/operators/gcs_to_bq.py
@@ -49,7 +49,7 @@ class GoogleCloudStorageToBigQueryOperator(BaseOperator):
     :type schema_fields: list
     :param schema_object: If set, a GCS object path pointing to a .json file that
         contains the schema for the table. (templated)
-    :param schema_object: string
+    :type schema_object: string
     :param source_format: File format to export.
     :type source_format: string
     :param compression: [Optional] The compression type of the data source.
@@ -78,7 +78,7 @@ class GoogleCloudStorageToBigQueryOperator(BaseOperator):
         invalid error is returned in the job result.
     :type ignore_unknown_values: bool
     :param allow_quoted_newlines: Whether to allow quoted newlines (true) or not (false).
-    :type allow_quoted_newlines: boolean
+    :type allow_quoted_newlines: bool
     :param allow_jagged_rows: Accept rows that are missing trailing optional columns.
         The missing values are treated as nulls. If false, records with missing trailing
         columns are treated as bad records, and if there are too many bad records, an
diff --git a/airflow/contrib/operators/gcs_to_gcs.py b/airflow/contrib/operators/gcs_to_gcs.py
index 256685f90b..523240c909 100644
--- a/airflow/contrib/operators/gcs_to_gcs.py
+++ b/airflow/contrib/operators/gcs_to_gcs.py
@@ -38,7 +38,7 @@ class GoogleCloudStorageToGoogleCloudStorageOperator(BaseOperator):
             unsupported.
     :type source_object: string
     :param destination_bucket: The destination Google cloud storage bucket
-    where the object should be. (templated)
+        where the object should be. (templated)
     :type destination_bucket: string
     :param destination_object: The destination name of the object in the
         destination Google cloud storage bucket. (templated)
@@ -46,15 +46,14 @@ class GoogleCloudStorageToGoogleCloudStorageOperator(BaseOperator):
         prefix that will be prepended to the final destination objects' paths.
         Note that the source path's part before the wildcard will be removed;
         if it needs to be retained it should be appended to destination_object.
-        For example, with prefix ``foo/*`` and destination_object `'blah/``, the
+        For example, with prefix ``foo/*`` and destination_object ``blah/``, the
         file ``foo/baz`` will be copied to ``blah/baz``; to retain the prefix write
         the destination_object as e.g. ``blah/foo``, in which case the copied file
         will be named ``blah/foo/baz``.
     :type destination_object: string
     :param move_object: When move object is True, the object is moved instead
-    of copied to the new location.
-                        This is the equivalent of a mv command as opposed to a
-                        cp command.
+        of copied to the new location. This is the equivalent of a mv command
+        as opposed to a cp command.
     :type move_object: bool
     :param google_cloud_storage_conn_id: The connection ID to use when
         connecting to Google cloud storage.
diff --git a/airflow/contrib/operators/mlengine_operator.py b/airflow/contrib/operators/mlengine_operator.py
index 8e75b3c608..2e2cfb4fe9 100644
--- a/airflow/contrib/operators/mlengine_operator.py
+++ b/airflow/contrib/operators/mlengine_operator.py
@@ -282,7 +282,6 @@ class MLEngineModelOperator(BaseOperator):
     :param project_id: The Google Cloud project name to which MLEngine
         model belongs. (templated)
     :type project_id: string
-
     :param model: A dictionary containing the information about the model.
         If the `operation` is `create`, then the `model` parameter should
         contain all the information about this model such as `name`.
@@ -290,15 +289,13 @@ class MLEngineModelOperator(BaseOperator):
         If the `operation` is `get`, the `model` parameter
         should contain the `name` of the model.
     :type model: dict
-
     :param operation: The operation to perform. Available operations are:
 
         * ``create``: Creates a new model as provided by the `model` parameter.
         * ``get``: Gets a particular model where the name is specified in `model`.
-
+    :type operation: string
     :param gcp_conn_id: The connection ID to use when fetching connection info.
     :type gcp_conn_id: string
-
     :param delegate_to: The account to impersonate, if any.
         For this to work, the service account making the request must have
         domain-wide delegation enabled.
diff --git a/airflow/contrib/operators/s3_to_gcs_operator.py b/airflow/contrib/operators/s3_to_gcs_operator.py
index 81c48a9e15..35bd1f9371 100644
--- a/airflow/contrib/operators/s3_to_gcs_operator.py
+++ b/airflow/contrib/operators/s3_to_gcs_operator.py
@@ -67,7 +67,9 @@ class S3ToGoogleCloudStorageOperator(S3ListOperator):
 
 
     **Example**:
+
     .. code-block:: python
+
        s3_to_gcs_op = S3ToGoogleCloudStorageOperator(
             task_id='s3_to_gcs_example',
             bucket='my-s3-bucket',
diff --git a/airflow/operators/oracle_operator.py b/airflow/operators/oracle_operator.py
index 84820c0790..275165f1c7 100644
--- a/airflow/operators/oracle_operator.py
+++ b/airflow/operators/oracle_operator.py
@@ -25,6 +25,7 @@
 class OracleOperator(BaseOperator):
     """
     Executes sql code in a specific Oracle database
+
     :param oracle_conn_id: reference to a specific Oracle database
     :type oracle_conn_id: string
     :param sql: the sql code to be executed. (templated)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


> Fix Documentation
> -----------------
>
>                 Key: AIRFLOW-3018
>                 URL: https://issues.apache.org/jira/browse/AIRFLOW-3018
>             Project: Apache Airflow
>          Issue Type: Improvement
>          Components: docs, Documentation
>            Reporter: Kaxil Naik
>            Assignee: Kaxil Naik
>            Priority: Minor
>
> Documentation contains lots of hooks and operators with mis-spelled, missing types and
wrongly formatted text



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


Mime
View raw message