airflow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maximebeauche...@apache.org
Subject [06/34] incubator-airflow-site git commit: Initial commit
Date Sun, 05 Jun 2016 05:23:56 GMT
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/code.html
----------------------------------------------------------------------
diff --git a/code.html b/code.html
new file mode 100644
index 0000000..4bf307d
--- /dev/null
+++ b/code.html
@@ -0,0 +1,3517 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  <title>API Reference &mdash; Airflow Documentation</title>
+  
+
+  
+  
+
+  
+
+  
+  
+    
+
+  
+
+  
+  
+    <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
+  
+
+  
+
+  
+    <link rel="top" title="Airflow Documentation" href="index.html"/>
+        <link rel="prev" title="FAQ" href="faq.html"/> 
+
+  
+  <script src="_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search">
+          
+
+          
+            <a href="index.html" class="icon icon-home"> Airflow
+          
+
+          
+          </a>
+
+          
+            
+            
+          
+
+          
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+
+          
+        </div>
+
+        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+          
+            
+            
+                <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="scheduler.html">Scheduling &amp; Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="faq.html">FAQ</a></li>
+<li class="toctree-l1 current"><a class="current reference internal" href="#">API Reference</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="#operators">Operators</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#baseoperator">BaseOperator</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#basesensoroperator">BaseSensorOperator</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#module-airflow.operators">Operator API</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#community-contributed-operators">Community-contributed Operators</a></li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="#macros">Macros</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#default-variables">Default Variables</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#id2">Macros</a></li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="#models">Models</a></li>
+<li class="toctree-l2"><a class="reference internal" href="#module-airflow.hooks">Hooks</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#community-contributed-hooks">Community contributed hooks</a></li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="#executors">Executors</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#community-contributed-executors">Community-contributed executors</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+            
+          
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="index.html">Airflow</a>
+      </nav>
+
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          
+
+ 
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="index.html">Docs</a> &raquo;</li>
+      
+    <li>API Reference</li>
+      <li class="wy-breadcrumbs-aside">
+        
+          
+            <a href="_sources/code.txt" rel="nofollow"> View page source</a>
+          
+        
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+            
+  <div class="section" id="api-reference">
+<h1>API Reference<a class="headerlink" href="#api-reference" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="operators">
+<h2>Operators<a class="headerlink" href="#operators" title="Permalink to this headline">¶</a></h2>
+<p>Operators allow for generation of certain types of tasks that become nodes in
+the DAG when instantiated. All operators derive from BaseOperator and
+inherit many attributes and methods that way. Refer to the BaseOperator
+documentation for more details.</p>
+<p>There are 3 main types of operators:</p>
+<ul class="simple">
+<li>Operators that performs an <strong>action</strong>, or tell another system to
+perform an action</li>
+<li><strong>Transfer</strong> operators move data from one system to another</li>
+<li><strong>Sensors</strong> are a certain type of operator that will keep running until a
+certain criterion is met. Examples include a specific file landing in HDFS or
+S3, a partition appearing in Hive, or a specific time of the day. Sensors
+are derived from <code class="docutils literal"><span class="pre">BaseSensorOperator</span></code> and run a poke
+method at a specified <code class="docutils literal"><span class="pre">poke_interval</span></code> until it returns <code class="docutils literal"><span class="pre">True</span></code>.</li>
+</ul>
+<div class="section" id="baseoperator">
+<h3>BaseOperator<a class="headerlink" href="#baseoperator" title="Permalink to this headline">¶</a></h3>
+<p>All operators are derived from <code class="docutils literal"><span class="pre">BaseOperator</span></code> and acquire much
+functionality through inheritance. Since this is the core of the engine,
+it&#8217;s worth taking the time to understand the parameters of <code class="docutils literal"><span class="pre">BaseOperator</span></code>
+to understand the primitive features that can be leveraged in your
+DAGs.</p>
+<dl class="class">
+<dt id="airflow.models.BaseOperator">
+<em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">BaseOperator</code><span class="sig-paren">(</span><em>task_id</em>, <em>owner='airflow'</em>, <em>email=None</em>, <em>email_on_retry=True</em>, <em>email_on_failure=True</em>, <em>retries=0</em>, <em>retry_delay=datetime.timedelta(0</em>, <em>300)</em>, <em>start_date=None</em>, <em>end_date=None</em>, <em>schedule_interval=None</em>, <em>depends_on_past=False</em>, <em>wait_for_downstream=False</em>, <em>dag=None</em>, <em>params=None</em>, <em>default_args=None</em>, <em>adhoc=False</em>, <em>priority_weight=1</em>, <em>queue='default'</em>, <em>pool=None</em>, <em>sla=None</em>, <em>execution_timeout=None</em>, <em>on_failure_callback=None</em>, <em>on_success_callback=None</em>, <em>on_retry_callback=None</em>, <em>trigger_rule=u'all_success'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.ht
 ml#BaseOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Abstract base class for all operators. Since operators create objects that
+become node in the dag, BaseOperator contains many recursive methods for
+dag crawling behavior. To derive this class, you are expected to override
+the constructor as well as the &#8216;execute&#8217; method.</p>
+<p>Operators derived from this task should perform or trigger certain tasks
+synchronously (wait for completion). Example of operators could be an
+operator the runs a Pig job (PigOperator), a sensor operator that
+waits for a partition to land in Hive (HiveSensorOperator), or one that
+moves data from Hive to MySQL (Hive2MySqlOperator). Instances of these
+operators (tasks) target specific operations, running specific scripts,
+functions or data transfers.</p>
+<p>This class is abstract and shouldn&#8217;t be instantiated. Instantiating a
+class derived from this one results in the creation of a task object,
+which ultimately becomes a node in DAG objects. Task dependencies should
+be set by using the set_upstream and/or set_downstream methods.</p>
+<p>Note that this class is derived from SQLAlchemy&#8217;s Base class, which
+allows us to push metadata regarding tasks to the database. Deriving this
+classes needs to implement the polymorphic specificities documented in
+SQLAlchemy. This should become clear while reading the code for other
+operators.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>task_id</strong> (<em>string</em>) &#8211; a unique, meaningful id for the task</li>
+<li><strong>owner</strong> (<em>string</em>) &#8211; the owner of the task, using the unix username is recommended</li>
+<li><strong>retries</strong> (<em>int</em>) &#8211; the number of retries that should be performed before
+failing the task</li>
+<li><strong>retry_delay</strong> (<em>timedelta</em>) &#8211; delay between retries</li>
+<li><strong>start_date</strong> (<em>datetime</em>) &#8211; The <code class="docutils literal"><span class="pre">start_date</span></code> for the task, determines
+the <code class="docutils literal"><span class="pre">execution_date</span></code> for the first task instance. The best practice
+is to have the start_date rounded
+to your DAG&#8217;s <code class="docutils literal"><span class="pre">schedule_interval</span></code>. Daily jobs have their start_date
+some day at 00:00:00, hourly jobs have their start_date at 00:00
+of a specific hour. Note that Airflow simply looks at the latest
+<code class="docutils literal"><span class="pre">execution_date</span></code> and adds the <code class="docutils literal"><span class="pre">schedule_interval</span></code> to determine
+the next <code class="docutils literal"><span class="pre">execution_date</span></code>. It is also very important
+to note that different tasks&#8217; dependencies
+need to line up in time. If task A depends on task B and their
+start_date are offset in a way that their execution_date don&#8217;t line
+up, A&#8217;s dependencies will never be met. If you are looking to delay
+a task, for example running a daily task at 2AM, look into the
+<code class="docutils literal"><span class="pre">TimeSensor</span></code> and <code class="docutils literal"><span class="pre">TimeDeltaSensor</span></code>. We advise against using
+dynamic <code class="docutils literal"><span class="pre">start_date</span></code> and recommend using fixed ones. Read the
+FAQ entry about start_date for more information.</li>
+<li><strong>end_date</strong> (<em>datetime</em>) &#8211; if specified, the scheduler won&#8217;t go beyond this date</li>
+<li><strong>depends_on_past</strong> (<em>bool</em>) &#8211; when set to true, task instances will run
+sequentially while relying on the previous task&#8217;s schedule to
+succeed. The task instance for the start_date is allowed to run.</li>
+<li><strong>wait_for_downstream</strong> (<em>bool</em>) &#8211; when set to true, an instance of task
+X will wait for tasks immediately downstream of the previous instance
+of task X to finish successfully before it runs. This is useful if the
+different instances of a task X alter the same asset, and this asset
+is used by tasks downstream of task X. Note that depends_on_past
+is forced to True wherever wait_for_downstream is used.</li>
+<li><strong>queue</strong> (<em>str</em>) &#8211; which queue to target when running this job. Not
+all executors implement queue management, the CeleryExecutor
+does support targeting specific queues.</li>
+<li><strong>dag</strong> (<a class="reference internal" href="#airflow.models.DAG" title="airflow.models.DAG"><em>DAG</em></a>) &#8211; a reference to the dag the task is attached to (if any)</li>
+<li><strong>priority_weight</strong> (<em>int</em>) &#8211; priority weight of this task against other task.
+This allows the executor to trigger higher priority tasks before
+others when things get backed up.</li>
+<li><strong>pool</strong> (<em>str</em>) &#8211; the slot pool this task should run in, slot pools are a
+way to limit concurrency for certain tasks</li>
+<li><strong>sla</strong> (<em>datetime.timedelta</em>) &#8211; time by which the job is expected to succeed. Note that
+this represents the <code class="docutils literal"><span class="pre">timedelta</span></code> after the period is closed. For
+example if you set an SLA of 1 hour, the scheduler would send dan email
+soon after 1:00AM on the <code class="docutils literal"><span class="pre">2016-01-02</span></code> if the <code class="docutils literal"><span class="pre">2016-01-01</span></code> instance
+has not succeede yet.
+The scheduler pays special attention for jobs with an SLA and
+sends alert
+emails for sla misses. SLA misses are also recorded in the database
+for future reference. All tasks that share the same SLA time
+get bundled in a single email, sent soon after that time. SLA
+notification are sent once and only once for each task instance.</li>
+<li><strong>execution_timeout</strong> (<em>datetime.timedelta</em>) &#8211; max time allowed for the execution of
+this task instance, if it goes beyond it will raise and fail.</li>
+<li><strong>on_failure_callback</strong> (<em>callable</em>) &#8211; a function to be called when a task instance
+of this task fails. a context dictionary is passed as a single
+parameter to this function. Context contains references to related
+objects to the task instance and is documented under the macros
+section of the API.</li>
+<li><strong>on_retry_callback</strong> &#8211; much like the <code class="docutils literal"><span class="pre">on_failure_callback</span></code> excepts
+that it is executed when retries occur.</li>
+<li><strong>on_success_callback</strong> (<em>callable</em>) &#8211; much like the <code class="docutils literal"><span class="pre">on_failure_callback</span></code> excepts
+that it is executed when the task succeeds.</li>
+<li><strong>trigger_rule</strong> (<em>str</em>) &#8211; defines the rule by which dependencies are applied
+for the task to get triggered. Options are:
+<code class="docutils literal"><span class="pre">{</span> <span class="pre">all_success</span> <span class="pre">|</span> <span class="pre">all_failed</span> <span class="pre">|</span> <span class="pre">all_done</span> <span class="pre">|</span> <span class="pre">one_success</span> <span class="pre">|</span>
+<span class="pre">one_failed</span> <span class="pre">|</span> <span class="pre">dummy}</span></code>
+default is <code class="docutils literal"><span class="pre">all_success</span></code>. Options can be set as string or
+using the constants defined in the static class
+<code class="docutils literal"><span class="pre">airflow.utils.TriggerRule</span></code></li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+</div>
+<div class="section" id="basesensoroperator">
+<h3>BaseSensorOperator<a class="headerlink" href="#basesensoroperator" title="Permalink to this headline">¶</a></h3>
+<p>All sensors are derived from <code class="docutils literal"><span class="pre">BaseSensorOperator</span></code>. All sensors inherit
+the <code class="docutils literal"><span class="pre">timeout</span></code> and <code class="docutils literal"><span class="pre">poke_interval</span></code> on top of the <code class="docutils literal"><span class="pre">BaseOperator</span></code>
+attributes.</p>
+<dl class="class">
+<dt id="airflow.operators.sensors.BaseSensorOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.sensors.</code><code class="descname">BaseSensorOperator</code><span class="sig-paren">(</span><em>poke_interval=60</em>, <em>timeout=604800</em>, <em>soft_fail=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/sensors.html#BaseSensorOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.sensors.BaseSensorOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Sensor operators are derived from this class an inherit these attributes.</p>
+<dl class="docutils">
+<dt>Sensor operators keep executing at a time interval and succeed when</dt>
+<dd>a criteria is met and fail if and when they time out.</dd>
+</dl>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>soft_fail</strong> (<em>bool</em>) &#8211; Set to true to mark the task as SKIPPED on failure</li>
+<li><strong>poke_interval</strong> (<em>int</em>) &#8211; Time in seconds that the job should wait in
+between each tries</li>
+<li><strong>timeout</strong> (<em>int</em>) &#8211; Time, in seconds before the task times out and fails.</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+</div>
+<div class="section" id="module-airflow.operators">
+<span id="operator-api"></span><h3>Operator API<a class="headerlink" href="#module-airflow.operators" title="Permalink to this headline">¶</a></h3>
+<dl class="class">
+<dt id="airflow.operators.BashOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">BashOperator</code><span class="sig-paren">(</span><em>bash_command</em>, <em>xcom_push=False</em>, <em>env=None</em>, <em>output_encoding='utf-8'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/bash_operator.html#BashOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.BashOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Execute a Bash script, command or set of commands.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>bash_command</strong> (<em>string</em>) &#8211; The command, set of commands or reference to a
+bash script (must be &#8216;.sh&#8217;) to be executed.</li>
+<li><strong>env</strong> (<em>dict</em>) &#8211; If env is not None, it must be a mapping that defines the
+environment variables for the new process; these are used instead
+of inheriting the current process environment, which is the default
+behavior. (templated)</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+<dl class="method">
+<dt id="airflow.operators.BashOperator.execute">
+<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/bash_operator.html#BashOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.BashOperator.execute" title="Permalink to this definition">¶</a></dt>
+<dd><p>Execute the bash command in a temporary directory
+which will be cleaned afterwards</p>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.BranchPythonOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">BranchPythonOperator</code><span class="sig-paren">(</span><em>python_callable</em>, <em>op_args=None</em>, <em>op_kwargs=None</em>, <em>provide_context=False</em>, <em>templates_dict=None</em>, <em>templates_exts=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/python_operator.html#BranchPythonOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.BranchPythonOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal"><span class="pre">python_operator.PythonOperator</span></code></p>
+<p>Allows a workflow to &#8220;branch&#8221; or follow a single path following the
+execution of this task.</p>
+<p>It derives the PythonOperator and expects a Python function that returns
+the task_id to follow. The task_id returned should point to a task
+directly downstream from {self}. All other &#8220;branches&#8221; or
+directly downstream tasks are marked with a state of <code class="docutils literal"><span class="pre">skipped</span></code> so that
+these paths can&#8217;t move forward. The <code class="docutils literal"><span class="pre">skipped</span></code> states are propageted
+downstream to allow for the DAG state to fill up and the DAG run&#8217;s state
+to be inferred.</p>
+<p>Note that using tasks with <code class="docutils literal"><span class="pre">depends_on_past=True</span></code> downstream from
+<code class="docutils literal"><span class="pre">BranchPythonOperator</span></code> is logically unsound as <code class="docutils literal"><span class="pre">skipped</span></code> status
+will invariably lead to block tasks that depend on their past successes.
+<code class="docutils literal"><span class="pre">skipped</span></code> states propagates where all directly upstream tasks are
+<code class="docutils literal"><span class="pre">skipped</span></code>.</p>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.TriggerDagRunOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">TriggerDagRunOperator</code><span class="sig-paren">(</span><em>trigger_dag_id</em>, <em>python_callable</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/dagrun_operator.html#TriggerDagRunOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.TriggerDagRunOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Triggers a DAG run for a specified <code class="docutils literal"><span class="pre">dag_id</span></code> if a criteria is met</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>trigger_dag_id</strong> (<em>str</em>) &#8211; the dag_id to trigger</li>
+<li><strong>python_callable</strong> (<em>python callable</em>) &#8211; a reference to a python function that will be
+called while passing it the <code class="docutils literal"><span class="pre">context</span></code> object and a placeholder
+object <code class="docutils literal"><span class="pre">obj</span></code> for your callable to fill and return if you want
+a DagRun created. This <code class="docutils literal"><span class="pre">obj</span></code> object contains a <code class="docutils literal"><span class="pre">run_id</span></code> and
+<code class="docutils literal"><span class="pre">payload</span></code> attribute that you can modify in your function.
+The <code class="docutils literal"><span class="pre">run_id</span></code> should be a unique identifier for that DAG run, and
+the payload has to be a picklable object that will be made available
+to your tasks while executing that DAG run. Your function header
+should look like <code class="docutils literal"><span class="pre">def</span> <span class="pre">foo(context,</span> <span class="pre">dag_run_obj):</span></code></li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.DummyOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">DummyOperator</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/dummy_operator.html#DummyOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.DummyOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Operator that does literally nothing. It can be used to group tasks in a
+DAG.</p>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.EmailOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">EmailOperator</code><span class="sig-paren">(</span><em>to</em>, <em>subject</em>, <em>html_content</em>, <em>files=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/email_operator.html#EmailOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.EmailOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Sends an email.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>to</strong> (<em>list or string (comma or semicolon delimited)</em>) &#8211; list of emails to send the email to</li>
+<li><strong>subject</strong> (<em>string</em>) &#8211; subject line for the email (templated)</li>
+<li><strong>html_content</strong> (<em>string</em>) &#8211; content of the email (templated), html markup
+is allowed</li>
+<li><strong>files</strong> (<em>list</em>) &#8211; file names to attach in email</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.ExternalTaskSensor">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">ExternalTaskSensor</code><span class="sig-paren">(</span><em>external_dag_id</em>, <em>external_task_id</em>, <em>allowed_states=None</em>, <em>execution_delta=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/sensors.html#ExternalTaskSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.ExternalTaskSensor" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.operators.sensors.BaseSensorOperator" title="airflow.operators.sensors.BaseSensorOperator"><code class="xref py py-class docutils literal"><span class="pre">sensors.BaseSensorOperator</span></code></a></p>
+<p>Waits for a task to complete in a different DAG</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>external_dag_id</strong> (<em>string</em>) &#8211; The dag_id that contains the task you want to
+wait for</li>
+<li><strong>external_task_id</strong> (<em>string</em>) &#8211; The task_id that contains the task you want to
+wait for</li>
+<li><strong>allowed_states</strong> (<em>list</em>) &#8211; list of allowed states, default is <code class="docutils literal"><span class="pre">['success']</span></code></li>
+<li><strong>execution_delta</strong> (<em>datetime.timedelta</em>) &#8211; time difference with the previous execution to
+look at, the default is the same execution_date as the current task.
+For yesterday, use [positive!] datetime.timedelta(days=1)</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.GenericTransfer">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">GenericTransfer</code><span class="sig-paren">(</span><em>sql</em>, <em>destination_table</em>, <em>source_conn_id</em>, <em>destination_conn_id</em>, <em>preoperator=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/generic_transfer.html#GenericTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.GenericTransfer" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Moves data from a connection to another, assuming that they both
+provide the required methods in their respective hooks. The source hook
+needs to expose a <cite>get_records</cite> method, and the destination a
+<cite>insert_rows</cite> method.</p>
+<p>This is mean to be used on small-ish datasets that fit in memory.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>sql</strong> (<em>str</em>) &#8211; SQL query to execute against the source database</li>
+<li><strong>destination_table</strong> (<em>str</em>) &#8211; target table</li>
+<li><strong>source_conn_id</strong> (<em>str</em>) &#8211; source connection</li>
+<li><strong>destination_conn_id</strong> (<em>str</em>) &#8211; source connection</li>
+<li><strong>preoperator</strong> (<em>str or list of str</em>) &#8211; sql statement or list of statements to be
+executed prior to loading the data</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.HdfsSensor">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">HdfsSensor</code><span class="sig-paren">(</span><em>filepath</em>, <em>hdfs_conn_id='hdfs_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/sensors.html#HdfsSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.HdfsSensor" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.operators.sensors.BaseSensorOperator" title="airflow.operators.sensors.BaseSensorOperator"><code class="xref py py-class docutils literal"><span class="pre">sensors.BaseSensorOperator</span></code></a></p>
+<p>Waits for a file or folder to land in HDFS</p>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.Hive2SambaOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">Hive2SambaOperator</code><span class="sig-paren">(</span><em>hql</em>, <em>destination_filepath</em>, <em>samba_conn_id='samba_default'</em>, <em>hiveserver2_conn_id='hiveserver2_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/hive_to_samba_operator.html#Hive2SambaOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.Hive2SambaOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Executes hql code in a specific Hive database and loads the
+results of the query as a csv to a Samba location.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>hql</strong> (<em>string</em>) &#8211; the hql to be exported</li>
+<li><strong>hiveserver2_conn_id</strong> (<em>string</em>) &#8211; reference to the hiveserver2 service</li>
+<li><strong>samba_conn_id</strong> (<em>string</em>) &#8211; reference to the samba destination</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.HiveOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">HiveOperator</code><span class="sig-paren">(</span><em>hql</em>, <em>hive_cli_conn_id='hive_cli_default'</em>, <em>schema='default'</em>, <em>hiveconf_jinja_translate=False</em>, <em>script_begin_tag=None</em>, <em>run_as_owner=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/hive_operator.html#HiveOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.HiveOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Executes hql code in a specific Hive database.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>hql</strong> (<em>string</em>) &#8211; the hql to be executed</li>
+<li><strong>hive_cli_conn_id</strong> (<em>string</em>) &#8211; reference to the Hive database</li>
+<li><strong>hiveconf_jinja_translate</strong> (<em>boolean</em>) &#8211; when True, hiveconf-type templating
+${var} gets translated into jinja-type templating {{ var }}. Note that
+you may want to use this along with the
+<code class="docutils literal"><span class="pre">DAG(user_defined_macros=myargs)</span></code> parameter. View the DAG
+object documentation for more details.</li>
+<li><strong>script_begin_tag</strong> (<em>str</em>) &#8211; If defined, the operator will get rid of the
+part of the script before the first occurrence of <cite>script_begin_tag</cite></li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.HivePartitionSensor">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">HivePartitionSensor</code><span class="sig-paren">(</span><em>table</em>, <em>partition=&quot;ds='{{ ds }}'&quot;</em>, <em>metastore_conn_id='metastore_default'</em>, <em>schema='default'</em>, <em>poke_interval=180</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/sensors.html#HivePartitionSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.HivePartitionSensor" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.operators.sensors.BaseSensorOperator" title="airflow.operators.sensors.BaseSensorOperator"><code class="xref py py-class docutils literal"><span class="pre">sensors.BaseSensorOperator</span></code></a></p>
+<p>Waits for a partition to show up in Hive</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>table</strong> (<em>string</em>) &#8211; The name of the table to wait for, supports the dot
+notation (my_database.my_table)</li>
+<li><strong>partition</strong> (<em>string</em>) &#8211; The partition clause to wait for. This is passed as
+is to the Metastore Thrift client &#8220;get_partitions_by_filter&#8221; method,
+and apparently supports SQL like notation as in <cite>ds=&#8216;2015-01-01&#8217;
+AND type=&#8217;value&#8217;</cite> and &gt; &lt; sings as in &#8220;ds&gt;=2015-01-01&#8221;</li>
+<li><strong>metastore_conn_id</strong> (<em>str</em>) &#8211; reference to the metastore thrift service
+connection id</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.HiveToDruidTransfer">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">HiveToDruidTransfer</code><span class="sig-paren">(</span><em>sql</em>, <em>druid_datasource</em>, <em>ts_dim</em>, <em>metric_spec=None</em>, <em>hive_cli_conn_id='hive_cli_default'</em>, <em>druid_ingest_conn_id='druid_ingest_default'</em>, <em>metastore_conn_id='metastore_default'</em>, <em>hadoop_dependency_coordinates=None</em>, <em>intervals=None</em>, <em>num_shards=-1</em>, <em>target_partition_size=-1</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/hive_to_druid.html#HiveToDruidTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.HiveToDruidTransfer" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Moves data from Hive to Druid, [del]note that for now the data is loaded
+into memory before being pushed to Druid, so this operator should
+be used for smallish amount of data.[/del]</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>sql</strong> (<em>str</em>) &#8211; SQL query to execute against the Druid database</li>
+<li><strong>druid_datasource</strong> (<em>str</em>) &#8211; the datasource you want to ingest into in druid</li>
+<li><strong>ts_dim</strong> (<em>str</em>) &#8211; the timestamp dimension</li>
+<li><strong>metric_spec</strong> (<em>list</em>) &#8211; the metrics you want to define for your data</li>
+<li><strong>hive_cli_conn_id</strong> (<em>str</em>) &#8211; the hive connection id</li>
+<li><strong>druid_ingest_conn_id</strong> (<em>str</em>) &#8211; the druid ingest connection id</li>
+<li><strong>metastore_conn_id</strong> (<em>str</em>) &#8211; the metastore connection id</li>
+<li><strong>hadoop_dependency_coordinates</strong> (<em>list of str</em>) &#8211; list of coordinates to squeeze
+int the ingest json</li>
+<li><strong>intervals</strong> (<em>list</em>) &#8211; list of time intervals that defines segments, this
+is passed as is to the json object</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.HiveToMySqlTransfer">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">HiveToMySqlTransfer</code><span class="sig-paren">(</span><em>sql</em>, <em>mysql_table</em>, <em>hiveserver2_conn_id='hiveserver2_default'</em>, <em>mysql_conn_id='mysql_default'</em>, <em>mysql_preoperator=None</em>, <em>mysql_postoperator=None</em>, <em>bulk_load=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/hive_to_mysql.html#HiveToMySqlTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.HiveToMySqlTransfer" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Moves data from Hive to MySQL, note that for now the data is loaded
+into memory before being pushed to MySQL, so this operator should
+be used for smallish amount of data.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>sql</strong> (<em>str</em>) &#8211; SQL query to execute against the MySQL database</li>
+<li><strong>mysql_table</strong> (<em>str</em>) &#8211; target MySQL table, use dot notation to target a
+specific database</li>
+<li><strong>mysql_conn_id</strong> (<em>str</em>) &#8211; source mysql connection</li>
+<li><strong>hiveserver2_conn_id</strong> (<em>str</em>) &#8211; destination hive connection</li>
+<li><strong>mysql_preoperator</strong> (<em>str</em>) &#8211; sql statement to run against mysql prior to
+import, typically use to truncate of delete in place of the data
+coming in, allowing the task to be idempotent (running the task
+twice won&#8217;t double load data)</li>
+<li><strong>mysql_postoperator</strong> (<em>str</em>) &#8211; sql statement to run against mysql after the
+import, typically used to move data from staging to production
+and issue cleanup commands.</li>
+<li><strong>bulk_load</strong> (<em>bool</em>) &#8211; flag to use bulk_load option.  This loads mysql directly
+from a tab-delimited text file using the LOAD DATA LOCAL INFILE command.
+This option requires an extra connection parameter for the
+destination MySQL connection: {&#8216;local_infile&#8217;: true}.</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.SimpleHttpOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">SimpleHttpOperator</code><span class="sig-paren">(</span><em>endpoint</em>, <em>method='POST'</em>, <em>data=None</em>, <em>headers=None</em>, <em>response_check=None</em>, <em>extra_options=None</em>, <em>http_conn_id='http_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/http_operator.html#SimpleHttpOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.SimpleHttpOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Calls an endpoint on an HTTP system to execute an action</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>http_conn_id</strong> (<em>string</em>) &#8211; The connection to run the sensor against</li>
+<li><strong>endpoint</strong> (<em>string</em>) &#8211; The relative part of the full url</li>
+<li><strong>method</strong> (<em>string</em>) &#8211; The HTTP method to use, default = &#8220;POST&#8221;</li>
+<li><strong>data</strong> (<em>For POST/PUT, depends on the content-type parameter,
+for GET a dictionary of key/value string pairs</em>) &#8211; The data to pass. POST-data in POST/PUT and params
+in the URL for a GET request.</li>
+<li><strong>headers</strong> (<em>a dictionary of string key/value pairs</em>) &#8211; The HTTP headers to be added to the GET request</li>
+<li><strong>response_check</strong> (<em>A lambda or defined function.</em>) &#8211; A check against the &#8216;requests&#8217; response object.
+Returns True for &#8216;pass&#8217; and False otherwise.</li>
+<li><strong>extra_options</strong> (<em>A dictionary of options, where key is string and value
+depends on the option that's being modified.</em>) &#8211; Extra options for the &#8216;requests&#8217; library, see the
+&#8216;requests&#8217; documentation (options to modify timeout, ssl, etc.)</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.HttpSensor">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">HttpSensor</code><span class="sig-paren">(</span><em>endpoint</em>, <em>http_conn_id='http_default'</em>, <em>params=None</em>, <em>headers=None</em>, <em>response_check=None</em>, <em>extra_options=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/sensors.html#HttpSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.HttpSensor" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.operators.sensors.BaseSensorOperator" title="airflow.operators.sensors.BaseSensorOperator"><code class="xref py py-class docutils literal"><span class="pre">sensors.BaseSensorOperator</span></code></a></p>
+<dl class="docutils">
+<dt>Executes a HTTP get statement and returns False on failure:</dt>
+<dd>404 not found or response_check function returned False</dd>
+</dl>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>http_conn_id</strong> (<em>string</em>) &#8211; The connection to run the sensor against</li>
+<li><strong>endpoint</strong> (<em>string</em>) &#8211; The relative part of the full url</li>
+<li><strong>params</strong> (<em>a dictionary of string key/value pairs</em>) &#8211; The parameters to be added to the GET url</li>
+<li><strong>headers</strong> (<em>a dictionary of string key/value pairs</em>) &#8211; The HTTP headers to be added to the GET request</li>
+<li><strong>response_check</strong> (<em>A lambda or defined function.</em>) &#8211; A check against the &#8216;requests&#8217; response object.
+Returns True for &#8216;pass&#8217; and False otherwise.</li>
+<li><strong>extra_options</strong> (<em>A dictionary of options, where key is string and value
+depends on the option that's being modified.</em>) &#8211; Extra options for the &#8216;requests&#8217; library, see the
+&#8216;requests&#8217; documentation (options to modify timeout, ssl, etc.)</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.MetastorePartitionSensor">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">MetastorePartitionSensor</code><span class="sig-paren">(</span><em>table</em>, <em>partition_name</em>, <em>schema='default'</em>, <em>mysql_conn_id='metastore_mysql'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/sensors.html#MetastorePartitionSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.MetastorePartitionSensor" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal"><span class="pre">sensors.SqlSensor</span></code></p>
+<p>An alternative to the HivePartitionSensor that talk directly to the
+MySQL db. This was created as a result of observing sub optimal
+queries generated by the Metastore thrift service when hitting
+subpartitioned tables. The Thrift service&#8217;s queries were written in a
+way that wouldn&#8217;t leverage the indexes.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>schema</strong> (<em>str</em>) &#8211; the schema</li>
+<li><strong>table</strong> (<em>str</em>) &#8211; the table</li>
+<li><strong>partition_name</strong> (<em>str</em>) &#8211; the partition name, as defined in the PARTITIONS
+table of the Metastore. Order of the fields does matter.
+Examples: <code class="docutils literal"><span class="pre">ds=2016-01-01</span></code> or
+<code class="docutils literal"><span class="pre">ds=2016-01-01/sub=foo</span></code> for a sub partitioned table</li>
+<li><strong>mysql_conn_id</strong> (<em>str</em>) &#8211; a reference to the MySQL conn_id for the metastore</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.MsSqlOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">MsSqlOperator</code><span class="sig-paren">(</span><em>sql</em>, <em>mssql_conn_id='mssql_default'</em>, <em>parameters=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mssql_operator.html#MsSqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.MsSqlOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Executes sql code in a specific Microsoft SQL database
+:param mssql_conn_id: reference to a specific mssql database
+:type mssql_conn_id: string
+:param sql: the sql code to be executed
+:type sql: string or string pointing to a template file.
+File must have a &#8216;.sql&#8217; extensions.</p>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.MsSqlToHiveTransfer">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">MsSqlToHiveTransfer</code><span class="sig-paren">(</span><em>sql</em>, <em>hive_table</em>, <em>create=True</em>, <em>recreate=False</em>, <em>partition=None</em>, <em>delimiter=u'x01'</em>, <em>mssql_conn_id='mssql_default'</em>, <em>hive_cli_conn_id='hive_cli_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mssql_to_hive.html#MsSqlToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.MsSqlToHiveTransfer" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Moves data from Microsoft SQL Server to Hive. The operator runs
+your query against Microsoft SQL Server, stores the file locally
+before loading it into a Hive table. If the <code class="docutils literal"><span class="pre">create</span></code> or
+<code class="docutils literal"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal"><span class="pre">True</span></code>,
+a <code class="docutils literal"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated.
+Hive data types are inferred from the cursor&#8217;s metadata.
+Note that the table generated in Hive uses <code class="docutils literal"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code>
+which isn&#8217;t the most efficient serialization format. If a
+large amount of data is loaded and/or if the table gets
+queried considerably, you may want to use this operator only to
+stage the data into a temporary table before loading it into its
+final destination using a <code class="docutils literal"><span class="pre">HiveOperator</span></code>.
+:param sql: SQL query to execute against the Microsoft SQL Server database
+:type sql: str
+:param hive_table: target Hive table, use dot notation to target a
+specific database
+:type hive_table: str
+:param create: whether to create the table if it doesn&#8217;t exist
+:type create: bool
+:param recreate: whether to drop and recreate the table at every execution
+:type recreate: bool
+:param partition: target partition as a dict of partition columns and values
+:type partition: dict
+:param delimiter: field delimiter in the file
+:type delimiter: str
+:param mssql_conn_id: source Microsoft SQL Server connection
+:type mssql_conn_id: str
+:param hive_conn_id: destination hive connection
+:type hive_conn_id: str</p>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.MySqlOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">MySqlOperator</code><span class="sig-paren">(</span><em>sql</em>, <em>mysql_conn_id='mysql_default'</em>, <em>parameters=None</em>, <em>autocommit=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mysql_operator.html#MySqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.MySqlOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Executes sql code in a specific MySQL database</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>mysql_conn_id</strong> (<em>string</em>) &#8211; reference to a specific mysql database</li>
+<li><strong>sql</strong> (<em>Can receive a str representing a sql statement,
+a list of str (sql statements), or reference to a template file.
+Template reference are recognized by str ending in '.sql'</em>) &#8211; the sql code to be executed</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.MySqlToHiveTransfer">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">MySqlToHiveTransfer</code><span class="sig-paren">(</span><em>sql</em>, <em>hive_table</em>, <em>create=True</em>, <em>recreate=False</em>, <em>partition=None</em>, <em>delimiter=u'x01'</em>, <em>mysql_conn_id='mysql_default'</em>, <em>hive_cli_conn_id='hive_cli_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mysql_to_hive.html#MySqlToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.MySqlToHiveTransfer" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Moves data from MySql to Hive. The operator runs your query against
+MySQL, stores the file locally before loading it into a Hive table.
+If the <code class="docutils literal"><span class="pre">create</span></code> or <code class="docutils literal"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal"><span class="pre">True</span></code>,
+a <code class="docutils literal"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated.
+Hive data types are inferred from the cursor&#8217;s metadata. Note that the
+table generated in Hive uses <code class="docutils literal"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code>
+which isn&#8217;t the most efficient serialization format. If a
+large amount of data is loaded and/or if the table gets
+queried considerably, you may want to use this operator only to
+stage the data into a temporary table before loading it into its
+final destination using a <code class="docutils literal"><span class="pre">HiveOperator</span></code>.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>sql</strong> (<em>str</em>) &#8211; SQL query to execute against the MySQL database</li>
+<li><strong>hive_table</strong> (<em>str</em>) &#8211; target Hive table, use dot notation to target a
+specific database</li>
+<li><strong>create</strong> (<em>bool</em>) &#8211; whether to create the table if it doesn&#8217;t exist</li>
+<li><strong>recreate</strong> (<em>bool</em>) &#8211; whether to drop and recreate the table at every
+execution</li>
+<li><strong>partition</strong> (<em>dict</em>) &#8211; target partition as a dict of partition columns
+and values</li>
+<li><strong>delimiter</strong> (<em>str</em>) &#8211; field delimiter in the file</li>
+<li><strong>mysql_conn_id</strong> (<em>str</em>) &#8211; source mysql connection</li>
+<li><strong>hive_conn_id</strong> (<em>str</em>) &#8211; destination hive connection</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.PostgresOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">PostgresOperator</code><span class="sig-paren">(</span><em>sql</em>, <em>postgres_conn_id='postgres_default'</em>, <em>autocommit=False</em>, <em>parameters=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/postgres_operator.html#PostgresOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.PostgresOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Executes sql code in a specific Postgres database</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>postgres_conn_id</strong> (<em>string</em>) &#8211; reference to a specific postgres database</li>
+<li><strong>sql</strong> (<em>Can receive a str representing a sql statement,
+a list of str (sql statements), or reference to a template file.
+Template reference are recognized by str ending in '.sql'</em>) &#8211; the sql code to be executed</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.PrestoCheckOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">PrestoCheckOperator</code><span class="sig-paren">(</span><em>sql</em>, <em>presto_conn_id='presto_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/presto_check_operator.html#PrestoCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.PrestoCheckOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal"><span class="pre">check_operator.CheckOperator</span></code></p>
+<p>Performs checks against Presto. The <code class="docutils literal"><span class="pre">PrestoCheckOperator</span></code> expects
+a sql query that will return a single row. Each value on that
+first row is evaluated using python <code class="docutils literal"><span class="pre">bool</span></code> casting. If any of the
+values return <code class="docutils literal"><span class="pre">False</span></code> the check is failed and errors out.</p>
+<p>Note that Python bool casting evals the following as <code class="docutils literal"><span class="pre">False</span></code>:
+* False
+* 0
+* Empty string (<code class="docutils literal"><span class="pre">&quot;&quot;</span></code>)
+* Empty list (<code class="docutils literal"><span class="pre">[]</span></code>)
+* Empty dictionary or set (<code class="docutils literal"><span class="pre">{}</span></code>)</p>
+<p>Given a query like <code class="docutils literal"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if
+the count <code class="docutils literal"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could,
+for instance, check that the table has the same number of rows as
+the source table upstream, or that the count of today&#8217;s partition is
+greater than yesterday&#8217;s partition, or that a set of metrics are less
+than 3 standard deviation for the 7 day average.</p>
+<p>This operator can be used as a data quality check in your pipeline, and
+depending on where you put it in your DAG, you have the choice to
+stop the critical path, preventing from
+publishing dubious data, or on the side and receive email alterts
+without stopping the progress of the DAG.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>sql</strong> (<em>string</em>) &#8211; the sql to be executed</li>
+<li><strong>presto_conn_id</strong> (<em>string</em>) &#8211; reference to the Presto database</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.PrestoIntervalCheckOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">PrestoIntervalCheckOperator</code><span class="sig-paren">(</span><em>table</em>, <em>metrics_thresholds</em>, <em>date_filter_column='ds'</em>, <em>days_back=-7</em>, <em>presto_conn_id='presto_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/presto_check_operator.html#PrestoIntervalCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.PrestoIntervalCheckOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal"><span class="pre">check_operator.IntervalCheckOperator</span></code></p>
+<p>Checks that the values of metrics given as SQL expressions are within
+a certain tolerance of the ones from days_back before.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>table</strong> (<em>str</em>) &#8211; the table name</li>
+<li><strong>days_back</strong> (<em>int</em>) &#8211; number of days between ds and the ds we want to check
+against. Defaults to 7 days</li>
+<li><strong>metrics_threshold</strong> (<em>dict</em>) &#8211; a dictionary of ratios indexed by metrics</li>
+<li><strong>presto_conn_id</strong> (<em>string</em>) &#8211; reference to the Presto database</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.PrestoValueCheckOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">PrestoValueCheckOperator</code><span class="sig-paren">(</span><em>sql</em>, <em>pass_value</em>, <em>tolerance=None</em>, <em>presto_conn_id='presto_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/presto_check_operator.html#PrestoValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.PrestoValueCheckOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal"><span class="pre">check_operator.ValueCheckOperator</span></code></p>
+<p>Performs a simple value check using sql code.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>sql</strong> (<em>string</em>) &#8211; the sql to be executed</li>
+<li><strong>presto_conn_id</strong> (<em>string</em>) &#8211; reference to the Presto database</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.PythonOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">PythonOperator</code><span class="sig-paren">(</span><em>python_callable</em>, <em>op_args=None</em>, <em>op_kwargs=None</em>, <em>provide_context=False</em>, <em>templates_dict=None</em>, <em>templates_exts=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/python_operator.html#PythonOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.PythonOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Executes a Python callable</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>python_callable</strong> (<em>python callable</em>) &#8211; A reference to an object that is callable</li>
+<li><strong>op_kwargs</strong> (<em>dict</em>) &#8211; a dictionary of keyword arguments that will get unpacked
+in your function</li>
+<li><strong>op_args</strong> (<em>list</em>) &#8211; a list of positional arguments that will get unpacked when
+calling your callable</li>
+<li><strong>provide_context</strong> (<em>bool</em>) &#8211; if set to true, Airflow will pass a set of
+keyword arguments that can be used in your function. This set of
+kwargs correspond exactly to what you can use in your jinja
+templates. For this to work, you need to define <cite>**kwargs</cite> in your
+function header.</li>
+<li><strong>templates_dict</strong> (<em>dict of str</em>) &#8211; a dictionary where the values are templates that
+will get templated by the Airflow engine sometime between
+<code class="docutils literal"><span class="pre">__init__</span></code> and <code class="docutils literal"><span class="pre">execute</span></code> takes place and are made available
+in your callable&#8217;s context after the template has been applied</li>
+<li><strong>templates_exts</strong> &#8211; a list of file extensions to resolve while
+processing templated fields, for examples <code class="docutils literal"><span class="pre">['.sql',</span> <span class="pre">'.hql']</span></code></li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.S3KeySensor">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">S3KeySensor</code><span class="sig-paren">(</span><em>bucket_key</em>, <em>bucket_name=None</em>, <em>wildcard_match=False</em>, <em>s3_conn_id='s3_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/sensors.html#S3KeySensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.S3KeySensor" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.operators.sensors.BaseSensorOperator" title="airflow.operators.sensors.BaseSensorOperator"><code class="xref py py-class docutils literal"><span class="pre">sensors.BaseSensorOperator</span></code></a></p>
+<p>Waits for a key (a file-like instance on S3) to be present in a S3 bucket.
+S3 being a key/value it does not support folders. The path is just a key
+a resource.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>bucket_key</strong> (<em>str</em>) &#8211; The key being waited on. Supports full s3:// style url
+or relative path from root level.</li>
+<li><strong>bucket_name</strong> (<em>str</em>) &#8211; Name of the S3 bucket</li>
+<li><strong>wildcard_match</strong> (<em>bool</em>) &#8211; whether the bucket_key should be interpreted as a
+Unix wildcard pattern</li>
+<li><strong>s3_conn_id</strong> (<em>str</em>) &#8211; a reference to the s3 connection</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.S3ToHiveTransfer">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">S3ToHiveTransfer</code><span class="sig-paren">(</span><em>s3_key</em>, <em>field_dict</em>, <em>hive_table</em>, <em>delimiter='</em>, <em>'</em>, <em>create=True</em>, <em>recreate=False</em>, <em>partition=None</em>, <em>headers=False</em>, <em>check_headers=False</em>, <em>wildcard_match=False</em>, <em>s3_conn_id='s3_default'</em>, <em>hive_cli_conn_id='hive_cli_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/s3_to_hive_operator.html#S3ToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.S3ToHiveTransfer" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Moves data from S3 to Hive. The operator downloads a file from S3,
+stores the file locally before loading it into a Hive table.
+If the <code class="docutils literal"><span class="pre">create</span></code> or <code class="docutils literal"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal"><span class="pre">True</span></code>,
+a <code class="docutils literal"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated.
+Hive data types are inferred from the cursor&#8217;s metadata from.</p>
+<p>Note that the table generated in Hive uses <code class="docutils literal"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code>
+which isn&#8217;t the most efficient serialization format. If a
+large amount of data is loaded and/or if the tables gets
+queried considerably, you may want to use this operator only to
+stage the data into a temporary table before loading it into its
+final destination using a <code class="docutils literal"><span class="pre">HiveOperator</span></code>.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>s3_key</strong> (<em>str</em>) &#8211; The key to be retrieved from S3</li>
+<li><strong>field_dict</strong> (<em>dict</em>) &#8211; A dictionary of the fields name in the file
+as keys and their Hive types as values</li>
+<li><strong>hive_table</strong> (<em>str</em>) &#8211; target Hive table, use dot notation to target a
+specific database</li>
+<li><strong>create</strong> (<em>bool</em>) &#8211; whether to create the table if it doesn&#8217;t exist</li>
+<li><strong>recreate</strong> (<em>bool</em>) &#8211; whether to drop and recreate the table at every
+execution</li>
+<li><strong>partition</strong> (<em>dict</em>) &#8211; target partition as a dict of partition columns
+and values</li>
+<li><strong>headers</strong> (<em>bool</em>) &#8211; whether the file contains column names on the first
+line</li>
+<li><strong>check_headers</strong> (<em>bool</em>) &#8211; whether the column names on the first line should be
+checked against the keys of field_dict</li>
+<li><strong>wildcard_match</strong> (<em>bool</em>) &#8211; whether the s3_key should be interpreted as a Unix
+wildcard pattern</li>
+<li><strong>delimiter</strong> (<em>str</em>) &#8211; field delimiter in the file</li>
+<li><strong>s3_conn_id</strong> (<em>str</em>) &#8211; source s3 connection</li>
+<li><strong>hive_conn_id</strong> (<em>str</em>) &#8211; destination hive connection</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.ShortCircuitOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">ShortCircuitOperator</code><span class="sig-paren">(</span><em>python_callable</em>, <em>op_args=None</em>, <em>op_kwargs=None</em>, <em>provide_context=False</em>, <em>templates_dict=None</em>, <em>templates_exts=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/python_operator.html#ShortCircuitOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.ShortCircuitOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal"><span class="pre">python_operator.PythonOperator</span></code></p>
+<p>Allows a workflow to continue only if a condition is met. Otherwise, the
+workflow &#8220;short-circuits&#8221; and downstream tasks are skipped.</p>
+<p>The ShortCircuitOperator is derived from the PythonOperator. It evaluates a
+condition and short-circuits the workflow if the condition is False. Any
+downstream tasks are marked with a state of &#8220;skipped&#8221;. If the condition is
+True, downstream tasks proceed as normal.</p>
+<p>The condition is determined by the result of <cite>python_callable</cite>.</p>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.SlackAPIOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">SlackAPIOperator</code><span class="sig-paren">(</span><em>token='unset'</em>, <em>method='unset'</em>, <em>api_params=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/slack_operator.html#SlackAPIOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.SlackAPIOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
+<p>Base Slack Operator
+The SlackAPIPostOperator is derived from this operator.
+In the future additional Slack API Operators will be derived from this class as well</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>token</strong> (<em>string</em>) &#8211; Slack API token (<a class="reference external" href="https://api.slack.com/web">https://api.slack.com/web</a>)</li>
+<li><strong>method</strong> (<em>string</em>) &#8211; The Slack API Method to Call (<a class="reference external" href="https://api.slack.com/methods">https://api.slack.com/methods</a>)</li>
+<li><strong>api_params</strong> (<em>dict</em>) &#8211; API Method call parameters (<a class="reference external" href="https://api.slack.com/methods">https://api.slack.com/methods</a>)</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+<dl class="method">
+<dt id="airflow.operators.SlackAPIOperator.construct_api_call_params">
+<code class="descname">construct_api_call_params</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/slack_operator.html#SlackAPIOperator.construct_api_call_params"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.SlackAPIOperator.construct_api_call_params" title="Permalink to this definition">¶</a></dt>
+<dd><p>Used by the execute function. Allows templating on the source fields of the api_call_params dict before construction</p>
+<p>Override in child classes.
+Each SlackAPIOperator child class is responsible for having a construct_api_call_params function
+which sets self.api_call_params with a dict of API call parameters (<a class="reference external" href="https://api.slack.com/methods">https://api.slack.com/methods</a>)</p>
+</dd></dl>
+
+<dl class="method">
+<dt id="airflow.operators.SlackAPIOperator.execute">
+<code class="descname">execute</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/slack_operator.html#SlackAPIOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.SlackAPIOperator.execute" title="Permalink to this definition">¶</a></dt>
+<dd><p>SlackAPIOperator calls will not fail even if the call is not unsuccessful.
+It should not prevent a DAG from completing in success</p>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.SlackAPIPostOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">SlackAPIPostOperator</code><span class="sig-paren">(</span><em>channel='#general'</em>, <em>username='Airflow'</em>, <em>text='No message has been set.nHere is a cat video insteadnhttps://www.youtube.com/watch?v=J---aiyznGQ'</em>, <em>icon_url='https://raw.githubusercontent.com/airbnb/airflow/master/airflow/www/static/pin_100.png'</em>, <em>attachments=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/slack_operator.html#SlackAPIPostOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.SlackAPIPostOperator" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal"><span class="pre">slack_operator.SlackAPIOperator</span></code></p>
+<p>Posts messages to a slack channel</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>channel</strong> (<em>string</em>) &#8211; channel in which to post message on slack name (#general) or ID (C12318391)</li>
+<li><strong>username</strong> (<em>string</em>) &#8211; Username that airflow will be posting to Slack as</li>
+<li><strong>text</strong> (<em>string</em>) &#8211; message to send to slack</li>
+<li><strong>icon_url</strong> (<em>string</em>) &#8211; url to icon used for this message</li>
+<li><strong>attachments</strong> (<em>array of hashes</em>) &#8211; extra formatting details - see <a class="reference external" href="https://api.slack.com/docs/attachments">https://api.slack.com/docs/attachments</a></li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.SqlSensor">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">SqlSensor</code><span class="sig-paren">(</span><em>conn_id</em>, <em>sql</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/sensors.html#SqlSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.SqlSensor" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.operators.sensors.BaseSensorOperator" title="airflow.operators.sensors.BaseSensorOperator"><code class="xref py py-class docutils literal"><span class="pre">sensors.BaseSensorOperator</span></code></a></p>
+<p>Runs a sql statement until a criteria is met. It will keep trying until
+sql returns no row, or if the first cell in (0, &#8216;0&#8217;, &#8216;&#8217;).</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>conn_id</strong> (<em>string</em>) &#8211; The connection to run the sensor against</li>
+<li><strong>sql</strong> &#8211; The sql to run. To pass, it needs to return at least one cell
+that contains a non-zero / empty string value.</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.TimeSensor">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">TimeSensor</code><span class="sig-paren">(</span><em>target_time</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/sensors.html#TimeSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.TimeSensor" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.operators.sensors.BaseSensorOperator" title="airflow.operators.sensors.BaseSensorOperator"><code class="xref py py-class docutils literal"><span class="pre">sensors.BaseSensorOperator</span></code></a></p>
+<p>Waits until the specified time of the day.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>target_time</strong> (<em>datetime.time</em>) &#8211; time after which the job succeeds</td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.WebHdfsSensor">
+<em class="property">class </em><code class="descclassname">airflow.operators.</code><code class="descname">WebHdfsSensor</code><span class="sig-paren">(</span><em>filepath</em>, <em>webhdfs_conn_id='webhdfs_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/sensors.html#WebHdfsSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.WebHdfsSensor" title="Permalink to this definition">¶</a></dt>
+<dd><p>Bases: <a class="reference internal" href="#airflow.operators.sensors.BaseSensorOperator" title="airflow.operators.sensors.BaseSensorOperator"><code class="xref py py-class docutils literal"><span class="pre">sensors.BaseSensorOperator</span></code></a></p>
+<p>Waits for a file or folder to land in HDFS</p>
+</dd></dl>
+
+<dl class="class">
+<dt id="airflow.operators.docker_operator.DockerOperator">
+<em class="property">class </em><code class="descclassname">airflow.operators.docker_operator.</code><code class="descname">DockerOperator</code><span class="sig-paren">(</span><em>image</em>, <em>api_version=None</em>, <em>command=None</em>, <em>cpus=1.0</em>, <em>docker_url='unix://var/run/docker.sock'</em>, <em>environment=None</em>, <em>force_pull=False</em>, <em>mem_limit=None</em>, <em>network_mode=None</em>, <em>tls_ca_cert=None</em>, <em>tls_client_cert=None</em>, <em>tls_client_key=None</em>, <em>tls_hostname=None</em>, <em>tls_ssl_version=None</em>, <em>tmp_dir='/tmp/airflow'</em>, <em>user=None</em>, <em>volumes=None</em>, <em>xcom_push=False</em>, <em>xcom_all=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/docker_operator.html#DockerOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.docker_operator.DockerOperator" title="Permal
 ink to this definition">¶</a></dt>
+<dd><p>Execute a command inside a docker container.</p>
+<p>A temporary directory is created on the host and mounted into a container to allow storing files
+that together exceed the default disk size of 10GB in a container. The path to the mounted
+directory can be accessed via the environment variable <code class="docutils literal"><span class="pre">AIRFLOW_TMP_DIR</span></code>.</p>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
+<li><strong>image</strong> (<em>str</em>) &#8211; Docker image from which to create the container.</li>

<TRUNCATED>


Mime
View raw message