beam-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From da...@apache.org
Subject beam-site git commit: Publish a blog about Apache Apex runner
Date Tue, 10 Jan 2017 01:43:47 GMT
Repository: beam-site
Updated Branches:
  refs/heads/asf-site 5de55f266 -> ba5fe2b2a


Publish a blog about Apache Apex runner


Project: http://git-wip-us.apache.org/repos/asf/beam-site/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam-site/commit/ba5fe2b2
Tree: http://git-wip-us.apache.org/repos/asf/beam-site/tree/ba5fe2b2
Diff: http://git-wip-us.apache.org/repos/asf/beam-site/diff/ba5fe2b2

Branch: refs/heads/asf-site
Commit: ba5fe2b2a1827fed3290c0418c7af873d64b975b
Parents: 5de55f2
Author: Davor Bonaci <davor@google.com>
Authored: Mon Jan 9 17:43:02 2017 -0800
Committer: Davor Bonaci <davor@google.com>
Committed: Mon Jan 9 17:43:02 2017 -0800

----------------------------------------------------------------------
 content/blog/2016/01/08/added-apex-runner.html | 211 --------------------
 content/blog/2016/01/09/added-apex-runner.html | 211 ++++++++++++++++++++
 content/blog/index.html                        |   6 +-
 content/index.html                             |   2 +-
 src/_posts/2017-01-09-added-apex-runner.md     |   2 +-
 5 files changed, 216 insertions(+), 216 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam-site/blob/ba5fe2b2/content/blog/2016/01/08/added-apex-runner.html
----------------------------------------------------------------------
diff --git a/content/blog/2016/01/08/added-apex-runner.html b/content/blog/2016/01/08/added-apex-runner.html
deleted file mode 100644
index abb22bb..0000000
--- a/content/blog/2016/01/08/added-apex-runner.html
+++ /dev/null
@@ -1,211 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-
-  <head>
-  <meta charset="utf-8">
-  <meta http-equiv="X-UA-Compatible" content="IE=edge">
-  <meta name="viewport" content="width=device-width, initial-scale=1">
-
-  <title>Release 0.4.0 adds a runner for Apache Apex</title>
-  <meta name="description" content="The latest release 0.4.0 of Apache Beam adds a new
runner for Apache Apex. We are excited to reach this initial milestone and are looking forward
to continue...">
-
-  <link rel="stylesheet" href="/styles/site.css">
-  <link rel="stylesheet" href="/css/theme.css">
-  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
-  <script src="/js/bootstrap.min.js"></script>
-  <script src="/js/language-switch.js"></script>
-  <link rel="canonical" href="http://beam.apache.org/blog/2016/01/08/added-apex-runner.html"
data-proofer-ignore>
-  <link rel="alternate" type="application/rss+xml" title="Apache Beam" href="http://beam.apache.org/feed.xml">
-  <script>
-    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-    (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-    m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-    })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-
-    ga('create', 'UA-73650088-1', 'auto');
-    ga('send', 'pageview');
-
-  </script>
-  <link rel="shortcut icon" type="image/x-icon" href="/images/favicon.ico">
-</head>
-
-
-  <body role="document">
-
-    <nav class="navbar navbar-default navbar-fixed-top">
-  <div class="container">
-    <div class="navbar-header">
-      <a href="/" class="navbar-brand" >
-        <img alt="Brand" style="height: 25px" src="/images/beam_logo_navbar.png">
-      </a>
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"
aria-expanded="false" aria-controls="navbar">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-    </div>
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav">
-        <li class="dropdown">
-		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false">Get Started <span class="caret"></span></a>
-		  <ul class="dropdown-menu">
-			  <li><a href="/get-started/beam-overview/">Beam Overview</a></li>
-              <li><a href="/get-started/quickstart/">Quickstart</a></li>
-			  <li role="separator" class="divider"></li>
-			  <li class="dropdown-header">Example Walkthroughs</li>
-			  <li><a href="/get-started/wordcount-example/">WordCount</a></li>
-			  <li><a href="/get-started/mobile-gaming-example/">Mobile Gaming</a></li>
-              <li role="separator" class="divider"></li>
-              <li class="dropdown-header">Resources</li>
-              <li><a href="/get-started/downloads">Downloads</a></li>
-              <li><a href="/get-started/support">Support</a></li>
-		  </ul>
-	    </li>
-        <li class="dropdown">
-		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false">Documentation <span class="caret"></span></a>
-		  <ul class="dropdown-menu">
-			  <li><a href="/documentation">Using the Documentation</a></li>
-			  <li role="separator" class="divider"></li>
-			  <li class="dropdown-header">Beam Concepts</li>
-			  <li><a href="/documentation/programming-guide/">Programming Guide</a></li>
-			  <li><a href="/documentation/resources/">Additional Resources</a></li>
-			  <li role="separator" class="divider"></li>
-              <li class="dropdown-header">Pipeline Fundamentals</li>
-              <li><a href="/documentation/pipelines/design-your-pipeline/">Design
Your Pipeline</a></li>
-              <li><a href="/documentation/pipelines/create-your-pipeline/">Create
Your Pipeline</a></li>
-              <li><a href="/documentation/pipelines/test-your-pipeline/">Test
Your Pipeline</a></li>
-              <li role="separator" class="divider"></li>
-			  <li class="dropdown-header">SDKs</li>
-			  <li><a href="/documentation/sdks/java/">Java SDK</a></li>
-			  <li><a href="/documentation/sdks/javadoc/0.4.0/" target="_blank">Java SDK
API Reference <img src="/images/external-link-icon.png"
-                 width="14" height="14"
-                 alt="External link."></a>
-        </li>
-        <li><a href="/documentation/sdks/python/">Python SDK</a></li>
-			  <li role="separator" class="divider"></li>
-			  <li class="dropdown-header">Runners</li>
-			  <li><a href="/documentation/runners/capability-matrix/">Capability Matrix</a></li>
-			  <li><a href="/documentation/runners/direct/">Direct Runner</a></li>
-			  <li><a href="/documentation/runners/apex/">Apache Apex Runner</a></li>
-			  <li><a href="/documentation/runners/flink/">Apache Flink Runner</a></li>
-			  <li><a href="/documentation/runners/spark/">Apache Spark Runner</a></li>
-			  <li><a href="/documentation/runners/dataflow/">Cloud Dataflow Runner</a></li>
-		  </ul>
-	    </li>
-        <li class="dropdown">
-		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false">Contribute <span class="caret"></span></a>
-		  <ul class="dropdown-menu">
-			  <li><a href="/contribute">Get Started Contributing</a></li>
-        <li role="separator" class="divider"></li>
-        <li class="dropdown-header">Guides</li>
-			  <li><a href="/contribute/contribution-guide/">Contribution Guide</a></li>
-        <li><a href="/contribute/testing/">Testing Guide</a></li>
-        <li><a href="/contribute/release-guide/">Release Guide</a></li>
-        <li role="separator" class="divider"></li>
-        <li class="dropdown-header">Technical References</li>
-        <li><a href="/contribute/design-principles/">Design Principles</a></li>
-			  <li><a href="/contribute/work-in-progress/">Ongoing Projects</a></li>
-        <li><a href="/contribute/source-repository/">Source Repository</a></li>
     
-        <li role="separator" class="divider"></li>
-			  <li class="dropdown-header">Promotion</li>
-        <li><a href="/contribute/presentation-materials/">Presentation Materials</a></li>
-        <li><a href="/contribute/logos/">Logos and Design</a></li>
-        <li role="separator" class="divider"></li>
-        <li><a href="/contribute/maturity-model/">Maturity Model</a></li>
-        <li><a href="/contribute/team/">Team</a></li>
-		  </ul>
-	    </li>
-
-        <li><a href="/blog">Blog</a></li>
-      </ul>
-      <ul class="nav navbar-nav navbar-right">
-        <li class="dropdown">
-          <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false"><img src="https://www.apache.org/foundation/press/kit/feather_small.png"
alt="Apache Logo" style="height:24px;">Apache Software Foundation<span class="caret"></span></a>
-          <ul class="dropdown-menu dropdown-menu-right">
-            <li><a href="http://www.apache.org/">ASF Homepage</a></li>
-            <li><a href="http://www.apache.org/licenses/">License</a></li>
-            <li><a href="http://www.apache.org/security/">Security</a></li>
-            <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
-            <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
-            <li><a href="https://www.apache.org/foundation/policies/conduct">Code
of Conduct</a></li>
-          </ul>
-        </li>
-      </ul>
-    </div><!--/.nav-collapse -->
-  </div>
-</nav>
-
-
-<link rel="stylesheet" href="">
-
-
-    <div class="container" role="main">
-
-      <div class="row">
-        
-
-<article class="post" itemscope itemtype="http://schema.org/BlogPosting">
-
-  <header class="post-header">
-    <h1 class="post-title" itemprop="name headline">Release 0.4.0 adds a runner for
Apache Apex</h1>
-    <p class="post-meta"><time datetime="2016-01-08T23:00:01-08:00" itemprop="datePublished">Jan
8, 2016</time> •  Thomas Weise [<a href="https://twitter.com/thweise">@thweise</a>]
-</p>
-  </header>
-
-  <div class="post-content" itemprop="articleBody">
-    <p>The latest release 0.4.0 of <a href="https://beam.apache.org">Apache Beam</a>
adds a new runner for <a href="http://apex.apache.org/">Apache Apex</a>. We are
excited to reach this initial milestone and are looking forward to continued collaboration
between the Beam and Apex communities to advance the runner.</p>
-
-<!--more-->
-
-<p>Beam evolved from the Google Dataflow SDK and as incubator project has quickly adapted
the Apache way, grown the community and attracts increasing interest from users that hope
to benefit from a conceptual strong unified programming model that is portable between different
big data processing frameworks (see <a href="https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101">Streaming-101</a>
and <a href="https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102">Streaming-102</a>).
Multiple Apache projects already provide runners for Beam (see <a href="http://beam.apache.org/documentation/runners/capability-matrix/">runners
and capabilities matrix</a>).</p>
-
-<p>Apex is a stream processing framework for low-latency, high-throughput, stateful
and reliable processing of complex analytics pipelines on clusters. Apex was developed since
2012 and is used in production by large companies for real-time and batch processing at scale.</p>
-
-<p>The initial revision of the runner was focussed on broad coverage of the Beam model
on a functional level. That means, there will be follow up work in several areas to take the
runner from functional to scalable and high performance to match the capabilities of Apex
and its native API. The runner capabilities matrix shows that the Apex capabilities are well
aligned with the Beam model. Specifically, the ability to track computational state in a fault
tolerant and efficient manner is needed to broadly support the windowing concepts, including
event time based processing.</p>
-
-<h2 id="stateful-stream-processor">Stateful Stream Processor</h2>
-
-<p>Apex was built as stateful stream processor from the ground up. Operators <a
href="https://www.datatorrent.com/blog/blog-introduction-to-checkpoint/">checkpoint</a>
state in a distributed and asynchronous manner that produces a consistent snapshot for the
entire processing graph, which can be used for recovery. Apex also supports such recovery
in an incremental, or fine grained, manner. This means only the portion of the DAG that is
actually affected by a failure will be recovered while the remaining pipeline continues processing
(this can be leveraged to implement use cases with special needs, such as speculative execution
to achieve SLA on the processing latency). The state checkpointing along with idempotent processing
guarantee is the basis for <a href="https://www.datatorrent.com/blog/end-to-end-exactly-once-with-apache-apex/">exactly-once
results</a> support in Apex.</p>
-
-<h2 id="translation-to-apex-dag">Translation to Apex DAG</h2>
-
-<p>A Beam runner needs to implement the translation from the Beam model to the underlying
frameworks execution model. In the case of Apex, the runner will translate the pipeline into
the <a href="https://www.datatorrent.com/blog/tracing-dags-from-specification-to-execution/">native
(compositional, low level) DAG API</a> (which is also the base for a number of other
API that are available to specify applications that run on Apex). The DAG consists of operators
(functional building blocks that are connected with streams. The runner provides the execution
layer. In the case of Apex it is distributed stream processing, operators process data event
by event. The minimum set of operators covers Beam’s primitive transforms: <code class="highlighter-rouge">ParDo.Bound</code>,
 <code class="highlighter-rouge">ParDo.BoundMulti</code>, <code class="highlighter-rouge">Read.Unbounded</code>,
<code class="highlighter-rouge">Read.Bounded</code>, <code class="highlighter-rouge">GroupByKey</code>,
  <code class="highlighter-rouge">Flatten.FlattenPCollectionList</code> etc.</p>
-
-<h2 id="execution-and-testing">Execution and Testing</h2>
-
-<p>In this release, the Apex runner executes the pipelines in embedded mode, where,
similar to the direct runner, everything is executed in a single JVM. See <a href="https://beam.apache.org/get-started/quickstart/">quickstart</a>
on how to run the Beam examples with the Apex runner.</p>
-
-<p>Embedded mode is useful for development and debugging. Apex in production runs distributed
on Apache Hadoop YARN clusters. An example how a Beam pipeline can be embedded into an Apex
application package to run on YARN can be found <a href="https://github.com/tweise/apex-samples/tree/master/beam-apex-wordcount">here</a>
and support for direct launch in the runner is currently being worked on.</p>
-
-<p>The Beam project has a strong focus on development process and tooling, including
testing. For the runners, there is a comprehensive test suite with more than 200 integration
tests that are executed against each runner to ensure they don’t break as changes are made.
The tests cover the capabilities of the matrix and thus are a measure of completeness and
correctness of the runner implementations. The suite was very helpful when developing the
Apex runner.</p>
-
-<h2 id="outlook">Outlook</h2>
-
-<p>The next step is to take the Apex runner from functional to ready for real applications
that run distributed, leveraging the scalability and performance features of Apex, similar
to its native API. This includes chaining of ParDos, partitioning, optimizing combine operations
etc. To get involved, please see <a href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20and%20component%20%3D%20runner-apex%20and%20resolution%20%3D%20unresolved">JIRA</a>
and join the Beam community.</p>
-
-  </div>
-
-</article>
-
-      </div>
-
-
-    <hr>
-  <div class="row">
-      <div class="col-xs-12">
-          <footer>
-              <p class="text-center">&copy; Copyright 2016
-                <a href="http://www.apache.org">The Apache Software Foundation.</a>
All Rights Reserved.</p>
-                <p class="text-center"><a href="/privacy_policy">Privacy Policy</a>
|
-                <a href="/feed.xml">RSS Feed</a></p>
-          </footer>
-      </div>
-  </div>
-  <!-- container div end -->
-</div>
-
-
-  </body>
-
-</html>

http://git-wip-us.apache.org/repos/asf/beam-site/blob/ba5fe2b2/content/blog/2016/01/09/added-apex-runner.html
----------------------------------------------------------------------
diff --git a/content/blog/2016/01/09/added-apex-runner.html b/content/blog/2016/01/09/added-apex-runner.html
new file mode 100644
index 0000000..3d0ab40
--- /dev/null
+++ b/content/blog/2016/01/09/added-apex-runner.html
@@ -0,0 +1,211 @@
+<!DOCTYPE html>
+<html lang="en">
+
+  <head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Release 0.4.0 adds a runner for Apache Apex</title>
+  <meta name="description" content="The latest release 0.4.0 of Apache Beam adds a new
runner for Apache Apex. We are excited to reach this initial milestone and are looking forward
to continue...">
+
+  <link rel="stylesheet" href="/styles/site.css">
+  <link rel="stylesheet" href="/css/theme.css">
+  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script src="/js/language-switch.js"></script>
+  <link rel="canonical" href="http://beam.apache.org/blog/2016/01/09/added-apex-runner.html"
data-proofer-ignore>
+  <link rel="alternate" type="application/rss+xml" title="Apache Beam" href="http://beam.apache.org/feed.xml">
+  <script>
+    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+    (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+    m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+    })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+    ga('create', 'UA-73650088-1', 'auto');
+    ga('send', 'pageview');
+
+  </script>
+  <link rel="shortcut icon" type="image/x-icon" href="/images/favicon.ico">
+</head>
+
+
+  <body role="document">
+
+    <nav class="navbar navbar-default navbar-fixed-top">
+  <div class="container">
+    <div class="navbar-header">
+      <a href="/" class="navbar-brand" >
+        <img alt="Brand" style="height: 25px" src="/images/beam_logo_navbar.png">
+      </a>
+      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"
aria-expanded="false" aria-controls="navbar">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+    </div>
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav">
+        <li class="dropdown">
+		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false">Get Started <span class="caret"></span></a>
+		  <ul class="dropdown-menu">
+			  <li><a href="/get-started/beam-overview/">Beam Overview</a></li>
+              <li><a href="/get-started/quickstart/">Quickstart</a></li>
+			  <li role="separator" class="divider"></li>
+			  <li class="dropdown-header">Example Walkthroughs</li>
+			  <li><a href="/get-started/wordcount-example/">WordCount</a></li>
+			  <li><a href="/get-started/mobile-gaming-example/">Mobile Gaming</a></li>
+              <li role="separator" class="divider"></li>
+              <li class="dropdown-header">Resources</li>
+              <li><a href="/get-started/downloads">Downloads</a></li>
+              <li><a href="/get-started/support">Support</a></li>
+		  </ul>
+	    </li>
+        <li class="dropdown">
+		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false">Documentation <span class="caret"></span></a>
+		  <ul class="dropdown-menu">
+			  <li><a href="/documentation">Using the Documentation</a></li>
+			  <li role="separator" class="divider"></li>
+			  <li class="dropdown-header">Beam Concepts</li>
+			  <li><a href="/documentation/programming-guide/">Programming Guide</a></li>
+			  <li><a href="/documentation/resources/">Additional Resources</a></li>
+			  <li role="separator" class="divider"></li>
+              <li class="dropdown-header">Pipeline Fundamentals</li>
+              <li><a href="/documentation/pipelines/design-your-pipeline/">Design
Your Pipeline</a></li>
+              <li><a href="/documentation/pipelines/create-your-pipeline/">Create
Your Pipeline</a></li>
+              <li><a href="/documentation/pipelines/test-your-pipeline/">Test
Your Pipeline</a></li>
+              <li role="separator" class="divider"></li>
+			  <li class="dropdown-header">SDKs</li>
+			  <li><a href="/documentation/sdks/java/">Java SDK</a></li>
+			  <li><a href="/documentation/sdks/javadoc/0.4.0/" target="_blank">Java SDK
API Reference <img src="/images/external-link-icon.png"
+                 width="14" height="14"
+                 alt="External link."></a>
+        </li>
+        <li><a href="/documentation/sdks/python/">Python SDK</a></li>
+			  <li role="separator" class="divider"></li>
+			  <li class="dropdown-header">Runners</li>
+			  <li><a href="/documentation/runners/capability-matrix/">Capability Matrix</a></li>
+			  <li><a href="/documentation/runners/direct/">Direct Runner</a></li>
+			  <li><a href="/documentation/runners/apex/">Apache Apex Runner</a></li>
+			  <li><a href="/documentation/runners/flink/">Apache Flink Runner</a></li>
+			  <li><a href="/documentation/runners/spark/">Apache Spark Runner</a></li>
+			  <li><a href="/documentation/runners/dataflow/">Cloud Dataflow Runner</a></li>
+		  </ul>
+	    </li>
+        <li class="dropdown">
+		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false">Contribute <span class="caret"></span></a>
+		  <ul class="dropdown-menu">
+			  <li><a href="/contribute">Get Started Contributing</a></li>
+        <li role="separator" class="divider"></li>
+        <li class="dropdown-header">Guides</li>
+			  <li><a href="/contribute/contribution-guide/">Contribution Guide</a></li>
+        <li><a href="/contribute/testing/">Testing Guide</a></li>
+        <li><a href="/contribute/release-guide/">Release Guide</a></li>
+        <li role="separator" class="divider"></li>
+        <li class="dropdown-header">Technical References</li>
+        <li><a href="/contribute/design-principles/">Design Principles</a></li>
+			  <li><a href="/contribute/work-in-progress/">Ongoing Projects</a></li>
+        <li><a href="/contribute/source-repository/">Source Repository</a></li>
     
+        <li role="separator" class="divider"></li>
+			  <li class="dropdown-header">Promotion</li>
+        <li><a href="/contribute/presentation-materials/">Presentation Materials</a></li>
+        <li><a href="/contribute/logos/">Logos and Design</a></li>
+        <li role="separator" class="divider"></li>
+        <li><a href="/contribute/maturity-model/">Maturity Model</a></li>
+        <li><a href="/contribute/team/">Team</a></li>
+		  </ul>
+	    </li>
+
+        <li><a href="/blog">Blog</a></li>
+      </ul>
+      <ul class="nav navbar-nav navbar-right">
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false"><img src="https://www.apache.org/foundation/press/kit/feather_small.png"
alt="Apache Logo" style="height:24px;">Apache Software Foundation<span class="caret"></span></a>
+          <ul class="dropdown-menu dropdown-menu-right">
+            <li><a href="http://www.apache.org/">ASF Homepage</a></li>
+            <li><a href="http://www.apache.org/licenses/">License</a></li>
+            <li><a href="http://www.apache.org/security/">Security</a></li>
+            <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+            <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+            <li><a href="https://www.apache.org/foundation/policies/conduct">Code
of Conduct</a></li>
+          </ul>
+        </li>
+      </ul>
+    </div><!--/.nav-collapse -->
+  </div>
+</nav>
+
+
+<link rel="stylesheet" href="">
+
+
+    <div class="container" role="main">
+
+      <div class="row">
+        
+
+<article class="post" itemscope itemtype="http://schema.org/BlogPosting">
+
+  <header class="post-header">
+    <h1 class="post-title" itemprop="name headline">Release 0.4.0 adds a runner for
Apache Apex</h1>
+    <p class="post-meta"><time datetime="2016-01-09T09:00:01-08:00" itemprop="datePublished">Jan
9, 2016</time> •  Thomas Weise [<a href="https://twitter.com/thweise">@thweise</a>]
+</p>
+  </header>
+
+  <div class="post-content" itemprop="articleBody">
+    <p>The latest release 0.4.0 of <a href="https://beam.apache.org">Apache Beam</a>
adds a new runner for <a href="http://apex.apache.org/">Apache Apex</a>. We are
excited to reach this initial milestone and are looking forward to continued collaboration
between the Beam and Apex communities to advance the runner.</p>
+
+<!--more-->
+
+<p>Beam evolved from the Google Dataflow SDK and as incubator project has quickly adapted
the Apache way, grown the community and attracts increasing interest from users that hope
to benefit from a conceptual strong unified programming model that is portable between different
big data processing frameworks (see <a href="https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101">Streaming-101</a>
and <a href="https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102">Streaming-102</a>).
Multiple Apache projects already provide runners for Beam (see <a href="http://beam.apache.org/documentation/runners/capability-matrix/">runners
and capabilities matrix</a>).</p>
+
+<p>Apex is a stream processing framework for low-latency, high-throughput, stateful
and reliable processing of complex analytics pipelines on clusters. Apex was developed since
2012 and is used in production by large companies for real-time and batch processing at scale.</p>
+
+<p>The initial revision of the runner was focussed on broad coverage of the Beam model
on a functional level. That means, there will be follow up work in several areas to take the
runner from functional to scalable and high performance to match the capabilities of Apex
and its native API. The runner capabilities matrix shows that the Apex capabilities are well
aligned with the Beam model. Specifically, the ability to track computational state in a fault
tolerant and efficient manner is needed to broadly support the windowing concepts, including
event time based processing.</p>
+
+<h2 id="stateful-stream-processor">Stateful Stream Processor</h2>
+
+<p>Apex was built as stateful stream processor from the ground up. Operators <a
href="https://www.datatorrent.com/blog/blog-introduction-to-checkpoint/">checkpoint</a>
state in a distributed and asynchronous manner that produces a consistent snapshot for the
entire processing graph, which can be used for recovery. Apex also supports such recovery
in an incremental, or fine grained, manner. This means only the portion of the DAG that is
actually affected by a failure will be recovered while the remaining pipeline continues processing
(this can be leveraged to implement use cases with special needs, such as speculative execution
to achieve SLA on the processing latency). The state checkpointing along with idempotent processing
guarantee is the basis for <a href="https://www.datatorrent.com/blog/end-to-end-exactly-once-with-apache-apex/">exactly-once
results</a> support in Apex.</p>
+
+<h2 id="translation-to-apex-dag">Translation to Apex DAG</h2>
+
+<p>A Beam runner needs to implement the translation from the Beam model to the underlying
frameworks execution model. In the case of Apex, the runner will translate the pipeline into
the <a href="https://www.datatorrent.com/blog/tracing-dags-from-specification-to-execution/">native
(compositional, low level) DAG API</a> (which is also the base for a number of other
API that are available to specify applications that run on Apex). The DAG consists of operators
(functional building blocks that are connected with streams. The runner provides the execution
layer. In the case of Apex it is distributed stream processing, operators process data event
by event. The minimum set of operators covers Beam’s primitive transforms: <code class="highlighter-rouge">ParDo.Bound</code>,
 <code class="highlighter-rouge">ParDo.BoundMulti</code>, <code class="highlighter-rouge">Read.Unbounded</code>,
<code class="highlighter-rouge">Read.Bounded</code>, <code class="highlighter-rouge">GroupByKey</code>,
  <code class="highlighter-rouge">Flatten.FlattenPCollectionList</code> etc.</p>
+
+<h2 id="execution-and-testing">Execution and Testing</h2>
+
+<p>In this release, the Apex runner executes the pipelines in embedded mode, where,
similar to the direct runner, everything is executed in a single JVM. See <a href="https://beam.apache.org/get-started/quickstart/">quickstart</a>
on how to run the Beam examples with the Apex runner.</p>
+
+<p>Embedded mode is useful for development and debugging. Apex in production runs distributed
on Apache Hadoop YARN clusters. An example how a Beam pipeline can be embedded into an Apex
application package to run on YARN can be found <a href="https://github.com/tweise/apex-samples/tree/master/beam-apex-wordcount">here</a>
and support for direct launch in the runner is currently being worked on.</p>
+
+<p>The Beam project has a strong focus on development process and tooling, including
testing. For the runners, there is a comprehensive test suite with more than 200 integration
tests that are executed against each runner to ensure they don’t break as changes are made.
The tests cover the capabilities of the matrix and thus are a measure of completeness and
correctness of the runner implementations. The suite was very helpful when developing the
Apex runner.</p>
+
+<h2 id="outlook">Outlook</h2>
+
+<p>The next step is to take the Apex runner from functional to ready for real applications
that run distributed, leveraging the scalability and performance features of Apex, similar
to its native API. This includes chaining of ParDos, partitioning, optimizing combine operations
etc. To get involved, please see <a href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20and%20component%20%3D%20runner-apex%20and%20resolution%20%3D%20unresolved">JIRA</a>
and join the Beam community.</p>
+
+  </div>
+
+</article>
+
+      </div>
+
+
+    <hr>
+  <div class="row">
+      <div class="col-xs-12">
+          <footer>
+              <p class="text-center">&copy; Copyright 2016
+                <a href="http://www.apache.org">The Apache Software Foundation.</a>
All Rights Reserved.</p>
+                <p class="text-center"><a href="/privacy_policy">Privacy Policy</a>
|
+                <a href="/feed.xml">RSS Feed</a></p>
+          </footer>
+      </div>
+  </div>
+  <!-- container div end -->
+</div>
+
+
+  </body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/beam-site/blob/ba5fe2b2/content/blog/index.html
----------------------------------------------------------------------
diff --git a/content/blog/index.html b/content/blog/index.html
index 40c3b69..7ac5976 100644
--- a/content/blog/index.html
+++ b/content/blog/index.html
@@ -337,8 +337,8 @@ Read more&nbsp;<span class="glyphicon glyphicon-menu-right" aria-hidden="true"><
 
 <hr />
 
-<h3 id="a-classpost-link-hrefblog20160108added-apex-runnerhtmlrelease-040-adds-a-runner-for-apache-apexa"><a
class="post-link" href="/blog/2016/01/08/added-apex-runner.html">Release 0.4.0 adds a runner
for Apache Apex</a></h3>
-<p><i>Jan 8, 2016 •  Thomas Weise [<a href="https://twitter.com/thweise">@thweise</a>]
+<h3 id="a-classpost-link-hrefblog20160109added-apex-runnerhtmlrelease-040-adds-a-runner-for-apache-apexa"><a
class="post-link" href="/blog/2016/01/09/added-apex-runner.html">Release 0.4.0 adds a runner
for Apache Apex</a></h3>
+<p><i>Jan 9, 2016 •  Thomas Weise [<a href="https://twitter.com/thweise">@thweise</a>]
 </i></p>
 
 <p>The latest release 0.4.0 of <a href="https://beam.apache.org">Apache Beam</a>
adds a new runner for <a href="http://apex.apache.org/">Apache Apex</a>. We are
excited to reach this initial milestone and are looking forward to continued collaboration
between the Beam and Apex communities to advance the runner.</p>
@@ -346,7 +346,7 @@ Read more&nbsp;<span class="glyphicon glyphicon-menu-right" aria-hidden="true"><
 <!-- Render a "read more" button if the post is longer than the excerpt -->
 
 <p>
-<a class="btn btn-default btn-sm" href="/blog/2016/01/08/added-apex-runner.html" role="button">
+<a class="btn btn-default btn-sm" href="/blog/2016/01/09/added-apex-runner.html" role="button">
 Read more&nbsp;<span class="glyphicon glyphicon-menu-right" aria-hidden="true"></span>
 </a>
 </p>

http://git-wip-us.apache.org/repos/asf/beam-site/blob/ba5fe2b2/content/index.html
----------------------------------------------------------------------
diff --git a/content/index.html b/content/index.html
index 57f728d..f116c6a 100644
--- a/content/index.html
+++ b/content/index.html
@@ -192,7 +192,7 @@
     
     <a class="list-group-item" href="/beam/update/website/2016/02/22/beam-has-a-logo.html">Feb
22, 2016 - Apache Beam has a logo!</a>
     
-    <a class="list-group-item" href="/blog/2016/01/08/added-apex-runner.html">Jan 8,
2016 - Release 0.4.0 adds a runner for Apache Apex</a>
+    <a class="list-group-item" href="/blog/2016/01/09/added-apex-runner.html">Jan 9,
2016 - Release 0.4.0 adds a runner for Apache Apex</a>
     
     </div>
   </div>

http://git-wip-us.apache.org/repos/asf/beam-site/blob/ba5fe2b2/src/_posts/2017-01-09-added-apex-runner.md
----------------------------------------------------------------------
diff --git a/src/_posts/2017-01-09-added-apex-runner.md b/src/_posts/2017-01-09-added-apex-runner.md
index 93c00ed..fa02080 100644
--- a/src/_posts/2017-01-09-added-apex-runner.md
+++ b/src/_posts/2017-01-09-added-apex-runner.md
@@ -1,7 +1,7 @@
 ---
 layout: post
 title:  "Release 0.4.0 adds a runner for Apache Apex"
-date:   2016-01-09 00:00:01 -0700
+date:   2016-01-09 10:00:01 -0700
 excerpt_separator: <!--more-->
 categories: blog
 authors:


Mime
View raw message