Mailing-List: contact commits-help@predictionio.incubator.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@predictionio.incubator.apache.org
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: chanlee@apache.org
To: commits@predictionio.incubator.apache.org
Date: Thu, 09 Feb 2017 17:34:04 -0000
Message-Id: <68e176629caf452db01e3dc9c2952ec6@git.apache.org>
In-Reply-To: <ef253966a030433c8d50c2c681f8427a@git.apache.org>
References: <ef253966a030433c8d50c2c681f8427a@git.apache.org>
Subject: [46/51] [abbrv] [partial] incubator-predictionio-site git commit:
 Documentation based on
 apache/incubator-predictionio#d674b89c7c3a17437bd406a497a08773c24c8007
archived-at: Thu, 09 Feb 2017 17:35:10 -0000

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/7177903a/customize/troubleshooting/index.html
----------------------------------------------------------------------
diff --git a/customize/troubleshooting/index.html b/customize/troubleshooting/index.html
new file mode 100644
index 0000000..65baf73
--- /dev/null
+++ b/customize/troubleshooting/index.html
@@ -0,0 +1,85 @@
+<!DOCTYPE html><html><head><title>Engine Development - Troubleshoot</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Engine Development - Troubleshoot"/><link rel="canonical" href="https://docs.prediction.io/customize/troubleshooting/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-3a3867f7.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.
 mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 co
 l-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Troubleshooting Engine Development</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><di
 v id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integr
 ating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Interfac
 e</span></a></li><li class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final active" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><u
 l><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Ch
 oosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Eva
 luation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><l
 i class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></
 a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">Customizing an Engine</a><span class="spacer">&gt;</span></li><li><span class="last">Troubleshooting Engine Development</span></li></ul></div><div id="page-title"><h1>Engine Development - Troubleshoot</h1></div></div><div id="tab
 le-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#stop-training-between-stages">Stop Training between Stages</a> </li> <li> <a href="#sanity-check">Sanity Check</a> </li> <li> <a href="#engine-status-page">Engine Status Page</a> </li> <li> <a href="#pio-shell">pio-shell</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/customize/troubleshooting.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">Customizing an Engine</a><span class="spacer">&gt;</span></li><li><span class="last">Troubleshooting Engine Development</span></li></ul></div><div id="page-title"><h1>Engine Development - Troubleshoot</h1></div></div><div class="content"><p>Apache PredictionIO (incubating) provides the following features to help y
 ou debug engines during development cycle.</p><h2 id='stop-training-between-stages' class='header-anchors'>Stop Training between Stages</h2><p>By default <code>pio train</code> runs through the whole training process including <a href="/templates/recommendation/dase/">DataSource, Preparator and Algorithm</a>. To speed up the development and debug cycle, you can stop the process after each stage to verify it has completed correctly.</p><p>If you have modified DataSource and want to confirm the TrainingData is generated as expected, you can run <code>pio train</code> with <code>--stop-after-read</code> option:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre>pio train --stop-after-read
+</pre></td></tr></tbody></table> </div> <p>This would stop the training process after the TrainingData is generated.</p><p>For example, if you are running <a href="/templates/recommendation/quickstart/">Recommendation Template</a>, you should see the the training process stops after the TrainingData is printed.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> TrainingData:
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> ratings: <span class="o">[</span>1501] <span class="o">(</span>List<span class="o">(</span>Rating<span class="o">(</span>3,0,4.0<span class="o">)</span>, Rating<span class="o">(</span>3,1,4.0<span class="o">))</span>...<span class="o">)</span>
+...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> Training interrupted by org.apache.predictionio.workflow.StopAfterReadInterruption.
+</pre></td></tr></tbody></table> </div> <p>Similarly, you can stop the training after the Preparator phase by using --stop-after-prepare option and it would stop after PreparedData is generated:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre>pio train --stop-after-prepare
+</pre></td></tr></tbody></table> </div> <h2 id='sanity-check' class='header-anchors'>Sanity Check</h2><p>You can extend a trait <code>SanityCheck</code> and implement the method <code>sanityCheck()</code> with your error checking code. The <code>sanityCheck()</code> is called when the data is generated. This can be applied to <code>TrainingData</code>, <code>PreparedData</code> and the <code>Model</code> classes, which are outputs of DataSource&#39;s <code>readTraining()</code>, Preparator&#39;s <code>prepare()</code> and Algorithm&#39;s <code>train()</code> methods, respectively.</p><p>For example, one frequent error with the Recommendation Template is that the TrainingData is empty because the DataSource is not reading data correctly. You can add the check of empty data inside the <code>sanityCheck()</code> function. You can easily add other checking logic into the <code>sanityCheck()</code> function based on your own needs. Also, If you implement <code>toString()</code> method in
  your TrainingData. You can call <code>toString()</code> inside <code>sanityCheck()</code> to print out some data for visual checking.</p><p>For example, to print TrainingData to console and check if the <code>ratings</code> is empty, you can do the following:</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16</pre></td><td class="code"><pre><span class="k">import</span> <span class="nn">org.apache.predictionio.controller.SanityCheck</span> <span class="c1">// ADDED
+</span>
+<span class="k">class</span> <span class="nc">TrainingData</span><span class="o">(</span>
+  <span class="k">val</span> <span class="n">ratings</span><span class="k">:</span> <span class="kt">RDD</span><span class="o">[</span><span class="kt">Rating</span><span class="o">]</span>
+<span class="o">)</span> <span class="k">extends</span> <span class="nc">Serializable</span> <span class="k">with</span> <span class="nc">SanityCheck</span> <span class="o">{</span> <span class="c1">// EXTEND SanityCheck
+</span>  <span class="k">override</span> <span class="k">def</span> <span class="n">toString</span> <span class="k">=</span> <span class="o">{</span>
+    <span class="n">s</span><span class="s">"ratings: [${ratings.count()}] (${ratings.take(2).toList}...)"</span>
+  <span class="o">}</span>
+
+  <span class="c1">// IMPLEMENT sanityCheck()
+</span>  <span class="k">override</span> <span class="k">def</span> <span class="n">sanityCheck</span><span class="o">()</span><span class="k">:</span> <span class="kt">Unit</span> <span class="o">=</span> <span class="o">{</span>
+    <span class="n">println</span><span class="o">(</span><span class="n">toString</span><span class="o">())</span>
+    <span class="c1">// add your other checking here
+</span>    <span class="n">require</span><span class="o">(!</span><span class="n">ratings</span><span class="o">.</span><span class="n">take</span><span class="o">(</span><span class="mi">1</span><span class="o">).</span><span class="n">isEmpty</span><span class="o">,</span> <span class="n">s</span><span class="s">"ratings cannot be empty!"</span><span class="o">)</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>You may also use together with --stop-after-read flag to debug the DataSource:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2</pre></td><td class="code"><pre>pio build
+pio train --stop-after-read
+</pre></td></tr></tbody></table> </div> <p>If your data is empty, you should see the following error thrown by the <code>sanityCheck()</code> function:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9</pre></td><td class="code"><pre><span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> Performing data sanity check on training data.
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> org.template.recommendation.TrainingData supports data sanity check. Performing check.
+Exception <span class="k">in </span>thread <span class="s2">"main"</span> java.lang.IllegalArgumentException: requirement failed: ratings cannot be empty!
+    at scala.Predef<span class="nv">$.</span>require<span class="o">(</span>Predef.scala:233<span class="o">)</span>
+    at org.template.recommendation.TrainingData.sanityCheck<span class="o">(</span>DataSource.scala:73<span class="o">)</span>
+    at org.apache.predictionio.workflow.CoreWorkflow<span class="nv">$$</span>anonfun<span class="nv">$runTypelessContext$7</span>.apply<span class="o">(</span>Workflow.scala:474<span class="o">)</span>
+    at org.apache.predictionio.workflow.CoreWorkflow<span class="nv">$$</span>anonfun<span class="nv">$runTypelessContext$7</span>.apply<span class="o">(</span>Workflow.scala:465<span class="o">)</span>
+    at scala.collection.immutable.Map<span class="nv">$Map1</span>.foreach<span class="o">(</span>Map.scala:109<span class="o">)</span>
+  ...
+</pre></td></tr></tbody></table> </div> <p>You can specify the <code>--skip-sanity-check</code> option to turn off sanityCheck:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre>pio train --stop-after-read --skip-sanity-check
+</pre></td></tr></tbody></table> </div> <p>You should see the checking is skipped such as the following output:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> Data sanity checking is off.
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> Data Source
+...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> Training interrupted by org.apache.predictionio.workflow.StopAfterReadInterruption.
+</pre></td></tr></tbody></table> </div> <h2 id='engine-status-page' class='header-anchors'>Engine Status Page</h2><p>After run <code>pio deploy</code>, you can access the engine status page by go to same URL and port of the deployed engine with your browser, which is &quot;<a href="http://localhost:8000">http://localhost:8000</a>&quot; by default. In the engine status page, you can find the Engine information, and parameters of each DASE components. In particular, you can also see the &quot;Model&quot; trained by the algorithm based on how <code>toString()</code> method is implemented in the Algorithm&#39;s Model class.</p><h2 id='pio-shell' class='header-anchors'>pio-shell</h2><p>Apache PredictionIO (incubating) also provides <code>pio-shell</code> in which you can easily access Apache PredictionIO (incubating) API, Spark context and Spark API for quickly testing code or debugging purposes.</p><p>To bring up the shell, simply run:</p><div class="highlight shell"><table style="borde
 r-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span>pio-shell --with-spark
+</pre></td></tr></tbody></table> </div> <p>(<code>pio-shell</code> is available inside <code>bin/</code> directory of installed Apache PredictionIO (incubating) directory, you should be able to access it if you have added PredictionIO/bin into your environment variable <code>PATH</code>)</p><p>Note that the Spark context is available as variable <code>sc</code> inside the shell.</p><p>For example, to get the events of <code>MyApp1</code> using PEventStore API inside the pio-shell and collect them into an array <code>c</code>. run the following in the shell:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3</pre></td><td class="code"><pre><span class="gp">&gt; </span>import org.apache.predictionio.data.store.PEventStore
+<span class="gp">&gt; </span>val eventsRDD <span class="o">=</span> PEventStore.find<span class="o">(</span><span class="nv">appName</span><span class="o">=</span><span class="s2">"MyApp1"</span><span class="o">)(</span>sc<span class="o">)</span>
+<span class="gp">&gt; </span>val c <span class="o">=</span> eventsRDD.collect<span class="o">()</span>
+</pre></td></tr></tbody></table> </div> <p>Then you should see following returned in the shell:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3</pre></td><td class="code"><pre>...
+15/05/18 14:24:42 INFO DAGScheduler: Job 0 finished: collect at &lt;console&gt;:24, took 1.850779 s
+c: Array[org.apache.predictionio.data.storage.Event] <span class="o">=</span> Array<span class="o">(</span>Event<span class="o">(</span><span class="nv">id</span><span class="o">=</span>Some<span class="o">(</span>AaQUUBsFZxteRpDV_7fDGQAAAU1ZfRW1tX9LSWdZSb0<span class="o">)</span>,event<span class="o">=</span><span class="nv">$set</span>,eType<span class="o">=</span>item,eId<span class="o">=</span>i42,tType<span class="o">=</span>None,tId<span class="o">=</span>None,p<span class="o">=</span>DataMap<span class="o">(</span>Map<span class="o">(</span>categories -&gt; JArray<span class="o">(</span>List<span class="o">(</span>JString<span class="o">(</span>c2<span class="o">)</span>, JString<span class="o">(</span>c1<span class="o">)</span>, JString<span class="o">(</span>c6<span class="o">)</span>, JString<span class="o">(</span>c3<span class="o">)))))</span>,t<span class="o">=</span>2015-05-15T21:31:19.349Z,tags<span class="o">=</span>List<span class="o">()</span>,pKey<span class="o">=
 </span>None,ct<span class="o">=</span>2015-05-15T21:31:19.354Z<span class="o">)</span>, Event<span class="o">(</span><span class="nv">id</span><span class="o">=</span>Some<span class="o">(</span>DjvP3Dnci9F4CWmiqoLabQAAAU1ZfROaqdRYO-pZ_no<span class="o">)</span>,event<span class="o">=</span><span class="nv">$set</span>,eType<span class="o">=</span>user,eId<span class="o">=</span>u9,tType<span class="o">=</span>None,tId<span class="o">=</span>None,p<span class="o">=</span>DataMap<span class="o">(</span>Map<span class="o">())</span>,t<span class="o">=</span>2015-05-15T21:31:18.810Z,tags<span class="o">=</span>List<span class="o">()</span>,pKey<span class="o">=</span>None,ct<span class="o">=</span>2015-05-15T21:31:18.817Z<span class="o">)</span>, Event<span class="o">(</span><span class="nv">id</span><span class="o">=</span>Some<span class="o">(</span>DjvP3Dnci9F4CWmiqoLabQAAAU1ZfRq7tsanlemwmZQ<span class="o">)</span>,event<span class="o">=</span>view,eType<span class="o">=</span>user,
 eId<span class="o">=</span>u9,tType<span class="o">=</span>Some<span class="o">(</span>item<span class="o">)</span>,tId<span class="o">=</span>Some<span class="o">(</span>i25<span class="o">)</span>,p<span class="o">=</span>DataMap<span class="o">(</span>Map<span class="o">())</span>,t<span class="o">=</span>2015-05-15T21:31:20.635Z,tags<span class="o">=</span>List<span class="o">()</span>,pKey<span class="o">=</span>None,ct<span class="o">=</span>2015-05-15T21:31:20.639Z<span class="o">)</span>, Event<span class="o">(</span><span class="nv">id</span><span class="o">=</span>Some<span class="o">(</span>DjvP3Dnci9F4CWmiqoLabQAAAU1ZfR...
+</pre></td></tr></tbody></table> </div> </div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" ta
 rget="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/
 apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-f819cf19.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/7177903a/customize/troubleshooting/index.html.gz
----------------------------------------------------------------------
diff --git a/customize/troubleshooting/index.html.gz b/customize/troubleshooting/index.html.gz
new file mode 100644
index 0000000..3bc4a7c
Binary files /dev/null and b/customize/troubleshooting/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/7177903a/datacollection/analytics-ipynb/index.html
----------------------------------------------------------------------
diff --git a/datacollection/analytics-ipynb/index.html b/datacollection/analytics-ipynb/index.html
new file mode 100644
index 0000000..3adfffa
--- /dev/null
+++ b/datacollection/analytics-ipynb/index.html
@@ -0,0 +1,87 @@
+<!DOCTYPE html><html><head><title>Machine Learning Analytics with IPython Notebook</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Machine Learning Analytics with IPython Notebook"/><link rel="canonical" href="https://docs.prediction.io/datacollection/analytics-ipynb/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-3a3867f7.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.m
 in.js"></script><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row
 "><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning Analytics with IPython Notebook</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="p
 age" class="container-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="lev
 el-1"><a class="expandible" href="#"><span>Integrating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#en
 gine-commands"><span>Engine Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>
 Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-
 1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/ev
 aluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Co
 mics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expand
 ible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with IPython Notebook</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#prerequisites">Prerequisites</a> </li> <li> <a href="#prepari
 ng-ipython-notebook">Preparing IPython Notebook</a> </li> <li> <a href="#performing-analysis-with-spark-sql">Performing Analysis with Spark SQL</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-ipynb.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine Learning Analytics with IPython Notebook</h1></div></div><div class="content"><p><a href="http://ipython.org/notebook.html">IPython Notebook</a> is a very powerful interactive computational environment, and with <a href="http://predictionio.incubator.apache.org">Apache PredictionIO (incubating)</a>, <a href="http://spark.apache.org/docs/latest/api/python/">PySpark</a> and <a href="https://spark.apache.org/sql/">Spark SQL</a>, you can easily analyze your collected events when you are developing or tuning your 
 engine.</p><h2 id='prerequisites' class='header-anchors'>Prerequisites</h2><p>Before you begin, please make sure you have the latest stable IPython installed, and that the command <code>ipython</code> can be accessed from your shell&#39;s search path.</p><p><h2 id='export-events-to-apache-parquet' class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO supports exporting your events to <a href="http://parquet.incubator.apache.org/">Apache Parquet</a>, a columnar storage format that allows you to query quickly.</p><p>Let&#39;s export the data we imported in <a href="/templates/recommendation/quickstart/#import-sample-data">Recommendation Engine Template Quick Start</a>, and assume the App ID is 1.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span><span class="nv">$PIO_HOME</span>/bin/pio <span class="nb">export</s
 pan> --appid 1 --output /tmp/movies --format parquet
+</pre></td></tr></tbody></table> </div> <p>After the command has finished successfully, you should see something similar to the following.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11</pre></td><td class="code"><pre>root
+ |-- creationTime: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- entityId: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- entityType: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- event: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- eventId: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- eventTime: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- properties: struct <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |    |-- rating: double <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- targetEntityId: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- targetEntityType: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+</pre></td></tr></tbody></table> </div></p><h2 id='preparing-ipython-notebook' class='header-anchors'>Preparing IPython Notebook</h2><p>Launch IPython Notebook with PySpark using the following command, with <code>$SPARK_HOME</code> replaced by the location of Apache Spark.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span><span class="nv">PYSPARK_DRIVER_PYTHON</span><span class="o">=</span>ipython <span class="nv">PYSPARK_DRIVER_PYTHON_OPTS</span><span class="o">=</span><span class="s2">"notebook --pylab inline"</span> <span class="nv">$SPARK_HOME</span>/bin/pyspark
+</pre></td></tr></tbody></table> </div> <p>By default, you should be able to access your IPython Notebook via web browser at <a href="http://localhost:8888">http://localhost:8888</a>.</p><p>Let&#39;s initialize our notebook for the following code in the first cell.</p><div class="highlight python"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7</pre></td><td class="code"><pre><span class="kn">import</span> <span class="nn">pandas</span> <span class="kn">as</span> <span class="nn">pd</span>
+<span class="k">def</span> <span class="nf">rows_to_df</span><span class="p">(</span><span class="n">rows</span><span class="p">):</span>
+    <span class="k">return</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">e</span><span class="p">:</span> <span class="n">e</span><span class="o">.</span><span class="n">asDict</span><span class="p">(),</span> <span class="n">rows</span><span class="p">))</span>
+<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SQLContext</span>
+<span class="n">sqlc</span> <span class="o">=</span> <span class="n">SQLContext</span><span class="p">(</span><span class="n">sc</span><span class="p">)</span>
+<span class="n">rdd</span> <span class="o">=</span> <span class="n">sqlc</span><span class="o">.</span><span class="n">parquetFile</span><span class="p">(</span><span class="s">"/tmp/movies"</span><span class="p">)</span>
+<span class="n">rdd</span><span class="o">.</span><span class="n">registerTempTable</span><span class="p">(</span><span class="s">"events"</span><span class="p">)</span>
+</pre></td></tr></tbody></table> </div> <p><img alt="Initialization for IPython Notebook" src="/images/datacollection/ipynb-01-004d791e.png"/></p><p><code>rows_to_df(rows)</code> will come in handy when we want to dump the results from Spark SQL using IPython Notebook&#39;s native table rendering.</p><h2 id='performing-analysis-with-spark-sql' class='header-anchors'>Performing Analysis with Spark SQL</h2><p>If all steps above ran successfully, you should have a ready-to-use analytics environment by now. Let&#39;s try a few examples to see if everything is functional.</p><p>In the second cell, put in this piece of code and run it.</p><div class="highlight python"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5</pre></td><td class="code"><pre><span class="n">summary</span> <span class="o">=</span> <span class="n">sqlc</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s">"SELECT "</span>
+                   <span class="s">"entityType, event, targetEntityType, COUNT(*) AS c "</span>
+                   <span class="s">"FROM events "</span>
+                   <span class="s">"GROUP BY entityType, event, targetEntityType"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
+<span class="n">rows_to_df</span><span class="p">(</span><span class="n">summary</span><span class="p">)</span>
+</pre></td></tr></tbody></table> </div> <p>You should see the following screen.</p><p><img alt="Summary of Events" src="/images/datacollection/ipynb-02-cd8b12e4.png"/></p><p>We can also plot our data, in the next two cells.</p><div class="highlight python"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7</pre></td><td class="code"><pre><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="kn">as</span> <span class="nn">plt</span>
+<span class="n">count</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">e</span><span class="p">:</span> <span class="n">e</span><span class="o">.</span><span class="n">c</span><span class="p">,</span> <span class="n">summary</span><span class="p">)</span>
+<span class="n">event</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">e</span><span class="p">:</span> <span class="s">"</span><span class="si">%</span><span class="s">s (</span><span class="si">%</span><span class="s">d)"</span> <span class="o">%</span> <span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">event</span><span class="p">,</span> <span class="n">e</span><span class="o">.</span><span class="n">c</span><span class="p">),</span> <span class="n">summary</span><span class="p">)</span>
+<span class="n">colors</span> <span class="o">=</span> <span class="p">[</span><span class="s">'gold'</span><span class="p">,</span> <span class="s">'lightskyblue'</span><span class="p">]</span>
+<span class="n">plt</span><span class="o">.</span><span class="n">pie</span><span class="p">(</span><span class="n">count</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="n">event</span><span class="p">,</span> <span class="n">colors</span><span class="o">=</span><span class="n">colors</span><span class="p">,</span> <span class="n">startangle</span><span class="o">=</span><span class="mi">90</span><span class="p">,</span> <span class="n">autopct</span><span class="o">=</span><span class="s">"</span><span class="si">%1.1</span><span class="s">f</span><span class="si">%%</span><span class="s">"</span><span class="p">)</span>
+<span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s">'equal'</span><span class="p">)</span>
+<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
+</pre></td></tr></tbody></table> </div> <p><img alt="Summary in Pie Chart" src="/images/datacollection/ipynb-03-28f3aa3d.png"/></p><div class="highlight python"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12</pre></td><td class="code"><pre><span class="n">ratings</span> <span class="o">=</span> <span class="n">sqlc</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s">"SELECT properties.rating AS r, COUNT(*) AS c "</span>
+                   <span class="s">"FROM events "</span>
+                   <span class="s">"WHERE properties.rating IS NOT NULL "</span>
+                   <span class="s">"GROUP BY properties.rating "</span>
+                   <span class="s">"ORDER BY r"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
+<span class="n">count</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">e</span><span class="p">:</span> <span class="n">e</span><span class="o">.</span><span class="n">c</span><span class="p">,</span> <span class="n">ratings</span><span class="p">)</span>
+<span class="n">rating</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">e</span><span class="p">:</span> <span class="s">"</span><span class="si">%</span><span class="s">s (</span><span class="si">%</span><span class="s">d)"</span> <span class="o">%</span> <span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">r</span><span class="p">,</span> <span class="n">e</span><span class="o">.</span><span class="n">c</span><span class="p">),</span> <span class="n">ratings</span><span class="p">)</span>
+<span class="n">colors</span> <span class="o">=</span> <span class="p">[</span><span class="s">'yellowgreen'</span><span class="p">,</span> <span class="s">'plum'</span><span class="p">,</span> <span class="s">'gold'</span><span class="p">,</span> <span class="s">'lightskyblue'</span><span class="p">,</span> <span class="s">'lightcoral'</span><span class="p">]</span>
+<span class="n">plt</span><span class="o">.</span><span class="n">pie</span><span class="p">(</span><span class="n">count</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="n">rating</span><span class="p">,</span> <span class="n">colors</span><span class="o">=</span><span class="n">colors</span><span class="p">,</span> <span class="n">startangle</span><span class="o">=</span><span class="mi">90</span><span class="p">,</span>
+        <span class="n">autopct</span><span class="o">=</span><span class="s">"</span><span class="si">%1.1</span><span class="s">f</span><span class="si">%%</span><span class="s">"</span><span class="p">)</span>
+<span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s">'equal'</span><span class="p">)</span>
+<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
+</pre></td></tr></tbody></table> </div> <p><img alt="Breakdown of Ratings" src="/images/datacollection/ipynb-04-797d73f1.png"/></p><p>Happy analyzing!</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/communi
 ty/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://
 github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-f819cf19.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/7177903a/datacollection/analytics-ipynb/index.html.gz
----------------------------------------------------------------------
diff --git a/datacollection/analytics-ipynb/index.html.gz b/datacollection/analytics-ipynb/index.html.gz
new file mode 100644
index 0000000..5464b39
Binary files /dev/null and b/datacollection/analytics-ipynb/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/7177903a/datacollection/analytics-tableau/index.html
----------------------------------------------------------------------
diff --git a/datacollection/analytics-tableau/index.html b/datacollection/analytics-tableau/index.html
new file mode 100644
index 0000000..dd0adea
--- /dev/null
+++ b/datacollection/analytics-tableau/index.html
@@ -0,0 +1,91 @@
+<!DOCTYPE html><html><head><title>Machine Learning Analytics with Tableau</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Machine Learning Analytics with Tableau"/><link rel="canonical" href="https://docs.prediction.io/datacollection/analytics-tableau/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-3a3867f7.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script>
 <script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="co
 l-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning Analytics with Tableau</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-flu
 id"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandibl
 e" href="#"><span>Integrating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engi
 ne Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing 
 Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" 
 href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><
 span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo<
 /span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Gett
 ing Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with Tableau</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#prerequisites">Prerequisites</a> </li> <li> <a href="#creating-hive-tables">Creating Hive Table
 s</a> </li> <li> <a href="#launch-spark-sql-s-thrift-jdbc-odbc-server">Launch Spark SQL's Thrift JDBC/ODBC Server</a> </li> <li> <a href="#performing-analysis-with-tableau">Performing Analysis with Tableau</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-tableau.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine Learning Analytics with Tableau</h1></div></div><div class="content"><p>With Spark SQL, it is possible to connect Tableau to Apache PredictionIO (incubating) Event Server for interactive analysis of event data.</p><h2 id='prerequisites' class='header-anchors'>Prerequisites</h2> <ul> <li>Tableau Desktop 8.3+ with a proper license key that supports Spark SQL;</li> <li>Spark ODBC Driver from Databricks (<a href="https://databricks.com/spark/odbc-dri
 ver-download">https://databricks.com/spark/odbc-driver-download</a>);</li> <li>Apache Hadoop 2.4+</li> <li>Apache Hive 0.3.1+</li> </ul> <div class="alert-message info"><p>In this article, we will assume that you have a working HDFS, and that your environmental variable <code>HADOOP_HOME</code> has been properly set. This is essential for Apache Hive to function properly. In addition, <code>HADOOP_CONF_DIR</code> in <code>$PIO_HOME/conf/pio-env.sh</code> must also be properly set for the <code>pio export</code> command to write to HDFS instead of the local filesystem.</p></div><p><h2 id='export-events-to-apache-parquet' class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO supports exporting your events to <a href="http://parquet.incubator.apache.org/">Apache Parquet</a>, a columnar storage format that allows you to query quickly.</p><p>Let&#39;s export the data we imported in <a href="/templates/recommendation/quickstart/#import-sample-data">Recommendation Engi
 ne Template Quick Start</a>, and assume the App ID is 1.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span><span class="nv">$PIO_HOME</span>/bin/pio <span class="nb">export</span> --appid 1 --output /tmp/movies --format parquet
+</pre></td></tr></tbody></table> </div> <p>After the command has finished successfully, you should see something similar to the following.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11</pre></td><td class="code"><pre>root
+ |-- creationTime: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- entityId: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- entityType: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- event: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- eventId: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- eventTime: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- properties: struct <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |    |-- rating: double <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- targetEntityId: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- targetEntityType: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+</pre></td></tr></tbody></table> </div></p><h2 id='creating-hive-tables' class='header-anchors'>Creating Hive Tables</h2><p>Before you can use Spark SQL&#39;s Thrift JDBC/ODBC Server, you will need to create the table schema in Hive first. Please make sure to replace <code>path_of_hive</code> with the real path.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="gp">$ </span><span class="nb">cd </span>path_of_hive
+<span class="gp">$ </span>bin/hive
+<span class="gp">hive&gt; </span>CREATE EXTERNAL TABLE events <span class="o">(</span>event STRING, entityType STRING, entityId STRING, targetEntityType STRING, targetEntityId STRING, properties STRUCT&lt;rating:DOUBLE&gt;<span class="o">)</span> STORED AS parquet LOCATION <span class="s1">'/tmp/movies'</span>;
+<span class="gp">hive&gt; </span><span class="nb">exit</span>;
+</pre></td></tr></tbody></table> </div> <h2 id='launch-spark-sql&#39;s-thrift-jdbc/odbc-server' class='header-anchors'>Launch Spark SQL&#39;s Thrift JDBC/ODBC Server</h2><p>Once you have created your Hive tables, create a Hive configuration in your Spark installation. If you have a custom <code>hive-site.xml</code>, simply copy or link it to <code>$SPARK_HOME/conf</code>. Otherwise, Hive would have created a local Derby database, and you will need to let Spark knows about it. Create <code>$SPARK_HOME/conf/hive-site.xml</code> from scratch with the following template.</p><div class="alert-message warning"><p>You must change <code>/opt/apache-hive-0.13.1-bin</code> below to a real Hive path.</p></div><div class="highlight xml"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8</pre></td><td class="code"><pre><span class="cp">&lt;?xml version="1.0" encoding="UTF-8" standalone="no"?&gt;</span>
+<span class="cp">&lt;?xml-stylesheet type="text/xsl" href="configuration.xsl"?&gt;</span>
+<span class="nt">&lt;configuration&gt;</span>
+  <span class="nt">&lt;property&gt;</span>
+    <span class="nt">&lt;name&gt;</span>javax.jdo.option.ConnectionURL<span class="nt">&lt;/name&gt;</span>
+    <span class="nt">&lt;value&gt;</span>jdbc:derby:;databaseName=/opt/apache-hive-0.13.1-bin/metastore_db;create=true<span class="nt">&lt;/value&gt;</span>
+  <span class="nt">&lt;/property&gt;</span>
+<span class="nt">&lt;/configuration&gt;</span>
+</pre></td></tr></tbody></table> </div> <p>Launch Spark SQL&#39;s Thift JDBC/ODBC Server by</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span><span class="nv">$SPARK_HOME</span>/sbin/start-thriftserver.sh
+</pre></td></tr></tbody></table> </div> <p>You can test the server using the included Beeline client.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20</pre></td><td class="code"><pre><span class="gp">$ </span><span class="nv">$SPARK_HOME</span>/bin/beeline
+<span class="gp">beeline&gt; </span>!connect jdbc:hive2://localhost:10000
+<span class="o">(</span>Use empty username and password when prompted<span class="o">)</span>
+0: jdbc:hive2://localhost:10000&gt; <span class="k">select</span> <span class="k">*</span> from events limit 10;
++--------+-------------+-----------+-------------------+-----------------+------------------+
+| event  | entitytype  | entityid  | targetentitytype  | targetentityid  |    properties    |
++--------+-------------+-----------+-------------------+-----------------+------------------+
+| buy    | user        | 3         | item              | 0               | <span class="o">{</span><span class="s2">"rating"</span>:null<span class="o">}</span>  |
+| buy    | user        | 3         | item              | 1               | <span class="o">{</span><span class="s2">"rating"</span>:null<span class="o">}</span>  |
+| rate   | user        | 3         | item              | 2               | <span class="o">{</span><span class="s2">"rating"</span>:1.0<span class="o">}</span>   |
+| buy    | user        | 3         | item              | 7               | <span class="o">{</span><span class="s2">"rating"</span>:null<span class="o">}</span>  |
+| buy    | user        | 3         | item              | 8               | <span class="o">{</span><span class="s2">"rating"</span>:null<span class="o">}</span>  |
+| buy    | user        | 3         | item              | 9               | <span class="o">{</span><span class="s2">"rating"</span>:null<span class="o">}</span>  |
+| rate   | user        | 3         | item              | 14              | <span class="o">{</span><span class="s2">"rating"</span>:1.0<span class="o">}</span>   |
+| buy    | user        | 3         | item              | 15              | <span class="o">{</span><span class="s2">"rating"</span>:null<span class="o">}</span>  |
+| buy    | user        | 3         | item              | 16              | <span class="o">{</span><span class="s2">"rating"</span>:null<span class="o">}</span>  |
+| buy    | user        | 3         | item              | 18              | <span class="o">{</span><span class="s2">"rating"</span>:null<span class="o">}</span>  |
++--------+-------------+-----------+-------------------+-----------------+------------------+
+10 rows selected <span class="o">(</span>0.515 seconds<span class="o">)</span>
+0: jdbc:hive2://localhost:10000&gt;
+</pre></td></tr></tbody></table> </div> <p>Now you are ready to use Tableau!</p><h2 id='performing-analysis-with-tableau' class='header-anchors'>Performing Analysis with Tableau</h2><p>Launch Tableau and Connect to Data. Click on <strong>Spark SQL (Beta)</strong> and enter Spark SQL&#39;s Thrift JDBC/ODBC Server information. Make sure to pick <strong>User Name</strong> as <strong>Authentication</strong>. Click <strong>Connect</strong>.</p><p><img alt="Tableau and Spark SQL" src="/images/datacollection/tableau-01-b5a23839.png"/></p><p>On the next page, pick <strong>default</strong> under <strong>Schema</strong>.</p><div class="alert-message info"><p>You may not see any choices when you click on Schema. Simply press Enter and Tableau will try to list all schemas.</p></div><p>Once you see a list of tables that includes <strong>events</strong>, click <strong>New Custom SQL</strong>, then enter the following.</p><div class="highlight sql"><table style="border-spacing: 0"><tbody><tr><td c
 lass="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="k">SELECT</span> <span class="n">event</span><span class="p">,</span> <span class="n">entityType</span><span class="p">,</span> <span class="n">entityId</span><span class="p">,</span> <span class="n">targetEntityType</span><span class="p">,</span> <span class="n">targetEntityId</span><span class="p">,</span> <span class="n">properties</span><span class="p">.</span><span class="n">rating</span> <span class="k">FROM</span> <span class="n">events</span>
+</pre></td></tr></tbody></table> </div> <p>Click <strong>Update Now</strong>. You should see the following screen by now, indicating success in loading data. Using a custom SQL allows you to extract arbitrary fields from within properties.</p><p><img alt="Setting up Tableau" src="/images/datacollection/tableau-02-76e93443.png"/></p><p>Click <strong>Go to Worksheet</strong> and start analyzing. The following shows an example of breaking down different rating values.</p><p><img alt="Rating Values Breakdown" src="/images/datacollection/tableau-03-e389351e.png"/></p><p>The following shows a summary of interactions.</p><p><img alt="Interactions" src="/images/datacollection/tableau-04-c8c31bb7.png"/></p><p>Happy analyzing!</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank"
 >Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div c
 lass="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script
 ><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-f819cf19.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/7177903a/datacollection/analytics-tableau/index.html.gz
----------------------------------------------------------------------
diff --git a/datacollection/analytics-tableau/index.html.gz b/datacollection/analytics-tableau/index.html.gz
new file mode 100644
index 0000000..d6e8fc9
Binary files /dev/null and b/datacollection/analytics-tableau/index.html.gz differ