streams-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sblack...@apache.org
Subject svn commit: r1787661 [7/14] - in /incubator/streams/site/trunk/content/site/0.5-incubating/streams-project: ./ credentials/ css/ fonts/ images/ images/logos/ images/profiles/ img/ install/ js/ services/
Date Sun, 19 Mar 2017 20:40:28 GMT
Added: incubator/streams/site/trunk/content/site/0.5-incubating/streams-project/faq.html
URL: http://svn.apache.org/viewvc/incubator/streams/site/trunk/content/site/0.5-incubating/streams-project/faq.html?rev=1787661&view=auto
==============================================================================
--- incubator/streams/site/trunk/content/site/0.5-incubating/streams-project/faq.html (added)
+++ incubator/streams/site/trunk/content/site/0.5-incubating/streams-project/faq.html Sun
Mar 19 20:40:26 2017
@@ -0,0 +1,457 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia Site Renderer 1.7 at 2017-03-19 
+ | Rendered using Apache Maven Fluido Skin 1.5
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20170319" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>Apache Streams &#x2013; Frequently Asked Questions</title>
+    <link rel="stylesheet" href="./css/apache-maven-fluido-1.5.min.css" />
+    <link rel="stylesheet" href="./css/site.css" />
+    <link rel="stylesheet" href="./css/print.css" media="print" />
+
+      
+    <script type="text/javascript" src="./js/apache-maven-fluido-1.5.min.js"></script>
+
+          <script>
+                (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+                (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+                m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+                })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+                ga('create', 'UA-77087101-1', 'auto');
+                ga('send', 'pageview');
+            </script>
+                      </head>
+        <body class="topBarEnabled">
+          
+    
+    
+            
+    
+        
+    <a href="https://github.com/apache/incubator-streams">
+      <img style="position: absolute; top: 0; right: 0; border: 0; z-index: 10000;"
+        src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png"
+        alt="Fork me on GitHub">
+    </a>
+  
+                        
+                    
+                
+
+    <div id="topbar" class="navbar navbar-fixed-top navbar-inverse">
+      <div class="navbar-inner">
+                                  <div class="container"><div class="nav-collapse">
+            
+                
+                                <ul class="nav">
+                          <li class="dropdown">
+        <a href="#" class="dropdown-toggle" data-toggle="dropdown">Overview <b class="caret"></b></a>
+        <ul class="dropdown-menu">
+        
+                      <li>      <a href="index.html"  title="Overview">Overview</a>
+</li>
+                  
+                      <li>      <a href="architecture.html"  title="Architecture">Architecture</a>
+</li>
+                  
+                      <li>      <a href="concepts.html"  title="Concepts">Concepts</a>
+</li>
+                  
+                      <li>      <a href="downloads.html"  title="Downloads">Downloads</a>
+</li>
+                  
+                      <li>      <a href="../../latest/streams-examples/"  title="Examples">Examples</a>
+</li>
+                  
+                      <li>      <a href="faq.html"  title="Frequently Asked Questions">Frequently
Asked Questions</a>
+</li>
+                  
+                      <li>      <a href="modules.html"  title="Modules">Modules</a>
+</li>
+                          </ul>
+      </li>
+                <li class="dropdown">
+        <a href="#" class="dropdown-toggle" data-toggle="dropdown">Users <b class="caret"></b></a>
+        <ul class="dropdown-menu">
+        
+                      <li>      <a href="install/java.html"  title="Install Java">Install
Java</a>
+</li>
+                  
+                      <li>      <a href="install/docker.html"  title="Install Docker">Install
Docker</a>
+</li>
+                  
+                      <li>      <a href="install/sbt.html"  title="Install SBT">Install
SBT</a>
+</li>
+                  
+                      <li class="dropdown-submenu">
+                                      <a href=""  title="Credentials">Credentials</a>
+              <ul class="dropdown-menu">
+                                  <li>      <a href="credentials/twitter.html" 
title="Twitter">Twitter</a>
+</li>
+                              </ul>
+            </li>
+                  
+                      <li class="dropdown-submenu">
+                                      <a href=""  title="Services">Services</a>
+              <ul class="dropdown-menu">
+                                  <li>      <a href="services/cassandra.html"  title="Cassandra">Cassandra</a>
+</li>
+                                  <li>      <a href="services/elasticsearch.html"
 title="Elasticsearch">Elasticsearch</a>
+</li>
+                                  <li>      <a href="services/hbase.html"  title="HBase">HBase</a>
+</li>
+                                  <li>      <a href="services/mongo.html"  title="Mongo">Mongo</a>
+</li>
+                                  <li>      <a href="services/neo4j.html"  title="Neo4j">Neo4j</a>
+</li>
+                              </ul>
+            </li>
+                          </ul>
+      </li>
+                <li class="dropdown">
+        <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers <b
class="caret"></b></a>
+        <ul class="dropdown-menu">
+        
+                      <li>      <a href="install/git.html"  title="Install Git">Install
Git</a>
+</li>
+                  
+                      <li>      <a href="install/maven.html"  title="Install Maven">Install
Maven</a>
+</li>
+                  
+                      <li>      <a href="changelog.html"  title="Changelog">Changelog</a>
+</li>
+                  
+                      <li>      <a href="release-setup.html"  title="Release Setup">Release
Setup</a>
+</li>
+                  
+                      <li>      <a href="release.html"  title="Release Process">Release
Process</a>
+</li>
+                  
+                      <li>      <a href="website.html"  title="Website Management">Website
Management</a>
+</li>
+                  
+                      <li>      <a href="code-conventions.html"  title="Coding Conventions">Coding
Conventions</a>
+</li>
+                          </ul>
+      </li>
+                <li class="dropdown">
+        <a href="#" class="dropdown-toggle" data-toggle="dropdown">Reports <b class="caret"></b></a>
+        <ul class="dropdown-menu">
+        
+                      <li class="dropdown-submenu">
+                                      <a href="project-info.html"  title="Project Information">Project
Information</a>
+              <ul class="dropdown-menu">
+                                  <li>      <a href="index.html"  title="About">About</a>
+</li>
+                                  <li>      <a href="modules.html"  title="Project
Modules">Project Modules</a>
+</li>
+                                  <li>      <a href="license.html"  title="Licenses">Licenses</a>
+</li>
+                                  <li>      <a href="mail-lists.html"  title="Mailing
Lists">Mailing Lists</a>
+</li>
+                                  <li>      <a href="team-list.html"  title="Team">Team</a>
+</li>
+                                  <li>      <a href="source-repository.html"  title="Source
Code Management">Source Code Management</a>
+</li>
+                                  <li>      <a href="issue-tracking.html"  title="Issue
Management">Issue Management</a>
+</li>
+                                  <li>      <a href="dependencies.html"  title="Dependencies">Dependencies</a>
+</li>
+                                  <li>      <a href="dependency-info.html"  title="Dependency
Information">Dependency Information</a>
+</li>
+                                  <li>      <a href="dependency-management.html"
 title="Dependency Management">Dependency Management</a>
+</li>
+                                  <li>      <a href="distribution-management.html"
 title="Distribution Management">Distribution Management</a>
+</li>
+                                  <li>      <a href="plugin-management.html"  title="Plugin
Management">Plugin Management</a>
+</li>
+                              </ul>
+            </li>
+                  
+                      <li class="dropdown-submenu">
+                                      <a href="project-reports.html"  title="Project Reports">Project
Reports</a>
+              <ul class="dropdown-menu">
+                                  <li>      <a href="checkstyle.html"  title="Checkstyle">Checkstyle</a>
+</li>
+                                  <li>      <a href="checkstyle-aggregate.html"
 title="Checkstyle">Checkstyle</a>
+</li>
+                                  <li>      <a href="rat-report.html"  title="Rat
Report">Rat Report</a>
+</li>
+                              </ul>
+            </li>
+                          </ul>
+      </li>
+                <li class="dropdown">
+        <a href="#" class="dropdown-toggle" data-toggle="dropdown">Other Resources
<b class="caret"></b></a>
+        <ul class="dropdown-menu">
+        
+                      <li>      <a href="http://activitystrea.ms"  title="ActivityStreams
Homepage">ActivityStreams Homepage</a>
+</li>
+                  
+                      <li>      <a href="http://activitystrea.ms/specs/"  title="ActivityStreams
Specifications">ActivityStreams Specifications</a>
+</li>
+                  
+                      <li>      <a href="http://blogs.apache.org/streams/"  title="Streams
Blog">Streams Blog</a>
+</li>
+                  
+                      <li>      <a href="http://wiki.apache.org/incubator/StreamsProposal"
 title="Streams Proposal Wiki Page">Streams Proposal Wiki Page</a>
+</li>
+                  
+                      <li>      <a href="http://www.apache.org/"  title="Apache
Foundation Info">Apache Foundation Info</a>
+</li>
+                  
+                      <li>      <a href="http://www.apache.org/foundation/sponsorship.html"
 title="Sponsorship">Sponsorship</a>
+</li>
+                  
+                      <li>      <a href="http://www.apache.org/foundation/thanks.html"
 title="Thanks">Thanks</a>
+</li>
+                          </ul>
+      </li>
+                  </ul>
+          
+          
+                                                    
+        
+        
+        <ul class="nav pull-right"><li>
+    
+    <a href="https://twitter.com/ApacheStreams" class="twitter-follow-button" data-show-count="true"
data-align="right" data-size="large" data-show-screen-name="true" data-lang="en">Follow
ApacheStreams</a>
+    <script type="text/javascript">!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0];if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src="//platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
+
+        </li></ul>
+                              
+                   
+                      </div>
+          
+        </div>
+      </div>
+    </div>
+    
+        <div class="container">
+          <div id="banner">
+        <div class="pull-left">
+                                                  <a href="../../.." id="bannerLeft">
+                                                                                        
       <img src="../../../images/streams_logo.jpg"  alt="Apache Streams" width="150" height="100"/>
+                </a>
+                      </div>
+        <div class="pull-right">                  <a href="http://incubator.apache.org"
id="bannerRight">
+                                                                                        <img
src="http://incubator.apache.org/images/egg-logo.png"  alt="Apache Incubator"/>
+                </a>
+      </div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+              
+                              <li class="">
+                    <a href="http://incubator.apache.org/" class="externalLink" title="Incubator">
+        Incubator</a>
+                    <span class="divider">/</span>
+      </li>
+            <li class="">
+                    <a href="../../latest/streams-project" title="Streams">
+        Streams</a>
+                    <span class="divider">/</span>
+      </li>
+        <li class="active ">Frequently Asked Questions</li>
+        
+              
+      
+                            </ul>
+      </div>
+
+      
+                
+        <div id="bodyColumn" >
+                                  
+            <div class="section">
+<h2><a name="Frequently_Asked_Questions"></a>Frequently Asked Questions</h2>
+<div class="section">
+<h3><a name="Why_should_I_adopt_activity_streams_for_my_project"></a>Why
should I adopt activity streams for my project?</h3>
+<p>Odds are the dataset you are working with is some combination of timestamped events
and observations of entities and their relationships at various points in time. Activity Streams
provides a simple yet powerful standard format for these types of data, regardless of their
origin, publisher, or specific details. Activity Streams is a community-driven specification
designed for interoperability and flexibility. By supporting activity streams you maximize
the chance that a new data-source of interest to you will be compatible with your existing
data, and that your data will be compatible with that of other communities working on similar
projects. </p></div>
+<div class="section">
+<h3><a name="What_organizations_exchange_data_in_activity_streams_formats"></a>What
organizations exchange data in activity streams formats?</h3>
+<p>A short list of organizations and products that support activity streams format
is compiled <a href="who.html" title="here">here</a>.</p>
+<p>If your organization supports activity streams, please let us know on the project
mailing list.</p></div>
+<div class="section">
+<h3><a name="Why_should_I_consider_using_Apache_Streams_for_my_project"></a>Why
should I consider using Apache Streams for my project?</h3>
+<p>If you are working with structured event and or entity data that fits the Activity
Streams model, and working with a JVM language, Apache Streams can simplify many of the challenging
aspects involved with these types of projects. For example:</p>
+
+<ul>
+  
+<li>Keeping track of the original source of each piece of information</li>
+  
+<li>Harmonizing a multitude of date-time formats</li>
+  
+<li>Moving between JSON, XML, YAML, and binary serializations</li>
+  
+<li>Writing processing logic that can run in both batch and real-time workflows</li>
+  
+<li>Defining constraints and validation rules for up-stream (third-party) and in-stream
(your sphere of control) data</li>
+  
+<li>Supplying run-time configuration globally and per-stream-component in a sensible
manner</li>
+</ul></div>
+<div class="section">
+<h3><a name="What_does_Apache_Streams_actually_do"></a>What does Apache
Streams actually do?</h3>
+<p>Apache Streams is</p>
+
+<ul>
+  
+<li>an SDK for data-centric JVM software</li>
+  
+<li>a set of modules that connect data-providing APIs and data-persisting analytical
systems</li>
+  
+<li>a community working to make web and enterprise datasets interoperable by default</li>
+</ul>
+<p>Apache Streams is not</p>
+
+<ul>
+  
+<li>one-size-fits-all</li>
+  
+<li>prescriptive or opinionated about how it should be used</li>
+  
+<li>only useful for projects fully dedicated to activity streams datasets</li>
+</ul>
+<p>The primary Streams git repository incubator-streams (org.apache.streams:streams-project)
contains</p>
+
+<ul>
+  
+<li>core interfaces and utilities</li>
+  
+<li>plugins for transforming schemas into source code and other artifacts</li>
+  
+<li>a library of modules for acquiring, transforming, and enriching data streams.</li>
+</ul>
+<p>Similar modules can also be hosted externally - so long as they publish maven artifacts
compatible with your version of streams, you can import and use them in your streams easily.</p>
+<p>The streams community also supports a separate repository incubator-streams-examples
(org.apache.streams:streams-examples) which contains a library of simple streams that are
&#x2018;ready-to-run&#x2019;. Look here to see what Streams user code look like.</p></div>
+<div class="section">
+<h3><a name="Why_bother_with_any_data_framework_at_all"></a>Why bother
with any data framework at all?</h3>
+<p>Why use Postgres, Elasticsearch, Cassandra, Hadoop, Linux, or Java?</p>
+<p>Frameworks make important but boring parts of systems and code just work so your
team can focus on features important to your users.</p>
+<p>If you are sure you can write code that is some combination of faster, more readable,
better tested, easier to learn, easier to build with, or more maintainable than any existing
framework (including Streams), maybe you should.</p>
+<p>But you are probably under-estimating how difficult it will be to optimize across
all of these considerations, stay current with upgrades to underlying libraries, and fix whatever
bugs are discovered.</p>
+<p>Or maybe you are capable of doing it all flawlessly, but your time is just more
valuable focused on your product rather than on plumbing.</p>
+<p>By joining forces with others who care about clean running water, everyone can run
better, faster, stronger code assembled with more diverse expertise, tested and tuned under
more use cases.</p></div>
+<div class="section">
+<h3><a name="How_is_streams_different_than_processing_framework"></a>How
is streams different than &#x201c;<i>processing framework</i>&#x201d;?</h3>
+<p>You don&#x2019;t have to look hard to find great data processing frameworks
for batch or for real-time. Pig, Hive, Storm, Spark, Samza, Flink, and Google Cloud Dataflow
(soon-to-be Apache Beam) are all great. Apex and NiFi are interesting newer options. This
list only includes Apache Foundation JVM projects!</p>
+<p>At the core these platforms help you connect inputs and outputs to a directed graph
of computation, and run your code at scale.</p>
+<p>Streams use this computational model as well, but is more focused on intelligently
and correctly modeling the data that will flow through the stream than on stream execution.
In this sense Streams is an alternative to avro or protocol buffers - one which prioritizes
flexibility, expressivity, interoperability, and tooling ahead of speed or compute efficiency.</p>
+<p>Streams seeks to make it easy to design and evolve streams, and to configure complex
streams sensibly. Where many processing frameworks leave all business logic and configuration
issues to the developer, streams modules are designed to mix-and-match. Streams modules expect
to be embedded with other frameworks and are organized to make that process painless.</p>
+<p>Streams also contains a library of plug-and-play data providers to collect and normalize
data from a variety of popular sources.</p></div>
+<div class="section">
+<h3><a name="How_do_I_deploy_Streams"></a>How do I deploy Streams?</h3>
+<p>Currently you cannot deploy Streams (uppercase). Streams has no shrink-wrapped ready-to-run
server process. You can however deploy streams (lowercase). The right method for packaging,
deploying, and running streams depends on what runtime you are going to use.</p>
+<p>Streams includes a local runtime that uses multi-threaded execution and blocking
queues within a single process. In this scenario you build an uberjar with few exclusions
and ship it to a target environment however you want - maven, scp, docker, etc&#x2026;
You launch the stream process with an appropriate configuration and watch the magic / catastrophic
fail.</p>
+<p>Alternatively, components written to streams interfaces can be bound within other
platforms such as pig or spark. In this scenario, you build an uberjar that excludes the platform
parts of the classpath and launch your stream using the launch style of that platform.</p></div>
+<div class="section">
+<h3><a name="Cant_I_just_dump_source_data_directly_into_files_or_databases"></a>Can&#x2019;t
I just dump source data directly into files or databases?</h3>
+<p>Absolutely - and that will work great right up until the point where the requirements,
the tools, or the way you want to index your data need to change.</p></div>
+<div class="section">
+<h3><a name="What_if_I_need_data_from_specific_API"></a>What if I need
data from &#x201c;<i>specific API</i>&#x201d;?</h3>
+<p>No problem - anyone can write a Streams provider. The project contains providers
that use a variety of strategies to generate near-real-time data streams, including:</p>
+
+<ul>
+  
+<li>sockets</li>
+  
+<li>webhooks</li>
+  
+<li>polling</li>
+  
+<li>scraping</li>
+</ul>
+<p>Providers can run continuously and pass-through new data, or they can work sequentially
through a backlog of items. If you need to collect so many items that you can&#x2019;t
fit all of their ids in the memory available to your stream, it&#x2019;s pretty simple
to sub-divide your backlog into small batches and launch a series of providers for collection
using frameworks such as Flink or Spark Streaming.</p></div>
+<div class="section">
+<h3><a name="What_if_I_want_to_keep_data_in_unsupported_database"></a>What
if I want to keep data in &#x201c;<i>unsupported database</i>&#x201d;?</h3>
+<p>No problem - anyone can write a Streams persist reader or persist writer. The project
contains persist writers that:</p>
+
+<ul>
+  
+<li>write documents efficiently with batch-style binary indexing</li>
+  
+<li>write documents one-by-one to services with REST api endpoints</li>
+  
+<li>write data to local or distributed buffers.</li>
+</ul>
+<p>If you just want to use streams providers to collect and feed incoming data into
a queueing system to work with outside of streams that&#x2019;s just fine.</p></div>
+<div class="section">
+<h3><a name="Cant_I_just_use_third-party_SDK_to_do_the_same_thing"></a>Can&#x2019;t
I just use &#x201c;<i>third-party SDK</i>&#x201d; to do the same thing?</h3>
+<p>Describe any specific data collection, processing, or storage function and there
are probably several if not tens of basic implementations on GitHub. There may even be language-specific
libraries published by a vendor with a commercial interest in a related technology.</p>
+<p>However, in general there are a set of tradeoffs involved when relying on these
packages.</p>
+
+<ul>
+  
+<li>They often have transitive dependencies.</li>
+  
+<li>They may not use performant HTTP and JSON libraries.</li>
+  
+<li>The object representations and lifecycle mechanisms they provide may not be consistent
with the rest of your code.</li>
+  
+<li>They may source configuration properties in a problematic or cumbersome fashion.</li>
+  
+<li>Their licenses may be restrictive or undocumented.</li>
+</ul>
+<p>Streams goes to great lengths to regularize many of these issues so that they are
uniform across project modules, and easy to reuse within new and external modules.</p>
+<p>Where quality java libraries exist, their most useful parts may be included within
a streams module, with unnecessary or difficult parts of their dependency tree excluded.</p></div>
+<div class="section">
+<h3><a name="Where_do_I_start"></a>Where do I start?</h3>
+<p>Work your way through the &#x2018;Tutorial&#x2019; menu to get up and running
with streams.</p>
+<p>Then browse the &#x2018;Other Resources&#x2019; menu to learn more about
how streams works and why.</p></div>
+<div class="section">
+<h3><a name="How_can_I_help"></a>How can I help?</h3>
+
+<ul>
+  
+<li>Join our mailing list.</li>
+  
+<li>Ask questions and suggest features.</li>
+  
+<li>Contribute to the documentation in one of the streams repositories.</li>
+  
+<li>Write a new provider using an existing provider as a template.</li>
+  
+<li>Add new features (and / or tests) to an existing module you intend to use.</li>
+  
+<li>Build and contributing a new example.</li>
+</ul>
+<div class="section">
+<div class="section">
+<div class="section">
+<h6><a name="Licensed_under_Apache_License_2.0_-_http:www.apache.orglicensesLICENSE-2.0"></a>Licensed
under Apache License 2.0 - <a class="externalLink" href="http://www.apache.org/licenses/LICENSE-2.0">http://www.apache.org/licenses/LICENSE-2.0</a></h6></div></div></div></div></div>
+                  </div>
+          </div>
+
+    <hr/>
+
+    <footer>
+            <div class="container">
+                      <div class="row">
+                                      <p >Copyright &copy;                    2017
+                        <a href="https://www.apache.org/">The Apache Software Foundation</a>.
+            All rights reserved.    
+                  <li id="publishDate" class="pull-right">Last Published: 2017-03-19</li>
+              <li id="projectVersion" class="pull-right">
+                    Version: 0.5-incubating
+        </li>
+            </p>
+                </div>
+
+                <p id="poweredBy" class="pull-right">
+                          <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
+        <img class="builtBy" alt="Built by Maven" src="./images/logos/maven-feather.png"
/>
+      </a>
+              </p>
+        
+                </div>
+    </footer>
+        </body>
+</html>
\ No newline at end of file

Added: incubator/streams/site/trunk/content/site/0.5-incubating/streams-project/fonts/glyphicons-halflings-regular.eot
URL: http://svn.apache.org/viewvc/incubator/streams/site/trunk/content/site/0.5-incubating/streams-project/fonts/glyphicons-halflings-regular.eot?rev=1787661&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/streams/site/trunk/content/site/0.5-incubating/streams-project/fonts/glyphicons-halflings-regular.eot
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



Mime
View raw message