mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From build...@apache.org
Subject svn commit: r1006115 - in /websites/staging/mahout/trunk/content: ./ users/algorithms/d-als.html users/algorithms/d-qr.html users/algorithms/d-spca.html
Date Thu, 02 Feb 2017 23:36:47 GMT
Author: buildbot
Date: Thu Feb  2 23:36:47 2017
New Revision: 1006115

Log:
Staging update by buildbot for mahout

Added:
    websites/staging/mahout/trunk/content/users/algorithms/d-als.html
    websites/staging/mahout/trunk/content/users/algorithms/d-spca.html
Modified:
    websites/staging/mahout/trunk/content/   (props changed)
    websites/staging/mahout/trunk/content/users/algorithms/d-qr.html

Propchange: websites/staging/mahout/trunk/content/
------------------------------------------------------------------------------
--- cms:source-revision (original)
+++ cms:source-revision Thu Feb  2 23:36:47 2017
@@ -1 +1 @@
-1781461
+1781487

Added: websites/staging/mahout/trunk/content/users/algorithms/d-als.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/algorithms/d-als.html (added)
+++ websites/staging/mahout/trunk/content/users/algorithms/d-als.html Thu Feb  2 23:36:47 2017
@@ -0,0 +1,350 @@
+<!DOCTYPE html>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <title>Apache Mahout: Scalable machine learning and data mining</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="Distribution" content="Global">
+  <meta name="Robots" content="index,follow">
+  <meta name="keywords" content="apache, apache hadoop, apache lucene,
+        business data mining, cluster analysis,
+        collaborative filtering, data extraction, data filtering, data framework, data integration,
+        data matching, data mining, data mining algorithms, data mining analysis, data mining data,
+        data mining introduction, data mining software,
+        data mining techniques, data representation, data set, datamining,
+        feature extraction, fuzzy k means, genetic algorithm, hadoop,
+        hierarchical clustering, high dimensional, introduction to data mining, kmeans,
+        knowledge discovery, learning approach, learning approaches, learning methods,
+        learning techniques, lucene, machine learning, machine translation, mahout apache,
+        mahout taste, map reduce hadoop, mining data, mining methods, naive bayes,
+        natural language processing,
+        supervised, text mining, time series data, unsupervised, web data mining">
+  <link rel="shortcut icon" type="image/x-icon" href="https://mahout.apache.org/images/favicon.ico">
+  <!--<script type="text/javascript" src="/js/prototype.js"></script>-->
+  <script type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/prototype/1.7.2.0/prototype.js"></script>
+  <script type="text/javascript" src="/js/effects.js"></script>
+  <script type="text/javascript" src="/js/search.js"></script>
+  <script type="text/javascript" src="/js/slides.js"></script>
+
+  <link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
+  <link href="/css/bootstrap-responsive.css" rel="stylesheet">
+  <link rel="stylesheet" href="/css/global.css" type="text/css">
+
+  <!-- mathJax stuff -- use `\(...\)` for inline style math in markdown -->
+  <script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    tex2jax: {
+      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
+    }
+  });
+  MathJax.Hub.Queue(function() {
+    var all = MathJax.Hub.getAllJax(), i;
+    for(i = 0; i < all.length; i += 1) {
+      all[i].SourceElement().parentNode.className += ' has-jax';
+    }
+  });
+  </script>
+  <script type="text/javascript">
+    var mathjax = document.createElement('script'); 
+    mathjax.type = 'text/javascript'; 
+    mathjax.async = true;
+
+    mathjax.src = ('https:' == document.location.protocol) ?
+        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' : 
+        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+	
+	  var s = document.getElementsByTagName('script')[0]; 
+    s.parentNode.insertBefore(mathjax, s);
+  </script>
+</head>
+
+<body id="home" data-twttr-rendered="true">
+  <div id="wrap">
+   <div id="header">
+    <div id="logo"><a href="/"><img src="/images/mahout-logo-brudman.png" alt="Logos for Mahout and Apache Software Foundation" /></a></div>
+  <div id="search">
+    <form id="search-form" action="http://www.google.com/search" method="get" class="navbar-search pull-right">    
+      <input value="http://mahout.apache.org" name="sitesearch" type="hidden">
+      <input class="search-query" name="q" id="query" type="text">
+      <input id="submission" type="image" src="/images/mahout-lupe.png" alt="Search" />
+    </form>
+  </div>
+ 
+    <div class="navbar navbar-inverse" style="position:absolute;top:133px;padding-right:0px;padding-left:0px;">
+      <div class="navbar-inner" style="border: none; background: #999; border: none; border-radius: 0px;">
+        <div class="container">
+          <button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <!-- <a class="brand" href="#">Apache Community Development Project</a> -->
+          <div class="nav-collapse collapse">
+            <ul class="nav">
+             <!-- <li><a href="/">Home</a></li> --> 
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/general/downloads.html">Downloads</a>
+                  <li><a href="/general/who-we-are.html">Who we are</a>
+                  <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
+                  <li><a href="/general/release-notes.html">Release Notes</a> 
+                  <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
+                  <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
+                  <li><a href="/general/professional-support.html">Professional Support</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Resources</li>
+                  <li><a href="/general/reference-reading.html">Reference Reading</a>
+                  <li><a href="/general/faq.html">FAQ</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Legal</li>
+                  <li><a href="http://www.apache.org/licenses/">License</a></li>
+                  <li><a href="http://www.apache.org/security/">Security</a></li>
+                  <li><a href="/general/privacy-policy.html">Privacy Policy</a>
+                </ul>
+              </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/developers/developer-resources.html">Developer resources</a></li>
+                  <li><a href="/developers/version-control.html">Version control</a></li>
+                  <li><a href="/developers/buildingmahout.html">Build from source</a></li>
+                  <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
+                  <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Contributions</li>
+                  <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
+                  <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
+                  <li><a href="/developers/gsoc.html">GSoC</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">For committers</li>
+                  <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
+                  <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
+                  <li><a href="/developers/github.html">Handling Github PRs</a></li>
+                  <li><a href="/developers/how-to-release.html">How to release</a></li>
+                  <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout-Samsara<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
+                  <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
+		  <li><a href="/users/flinkbindings/playing-with-samsara-flink.html">Flink Bindings Overview</a></li>
+                  <li class="nav-header">Engines</li>
+                  <li><a href="/users/sparkbindings/home.html">Spark</a></li>
+                  <li><a href="/users/environment/h2o-internals.html">H2O</a></li>
+                  <li><a href="/users/flinkbindings/flink-internals.html">Flink</a></li>
+                  <li class="nav-header">References</li>
+                  <li><a href="/users/environment/in-core-reference.html">In-Core Algebraic DSL Reference</a></li>
+                  <li><a href="/users/environment/out-of-core-reference.html">Distributed Algebraic DSL Reference</a></li>
+                  <li class="nav-header">Tutorials</li>
+                  <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
+                  <li><a href="/users/environment/how-to-build-an-app.html">How to build an app</a></li>
+                  <li><a href="/users/environment/classify-a-doc-from-the-shell.html">Building a text classifier in Mahout's Spark Shell</a></li>
+                </ul>
+              </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Algorithms<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/basics/algorithms.html">List of algorithms</a>
+                  <li class="nav-header">Distributed Matrix Decomposition</li>
+                  <li><a href="/users/algorithms/d-qr.html">Cholesky QR</a></li>
+                  <li><a href="/users/algorithms/d-ssvd.html">SSVD</a></li>
+                  <li><a href="/users/sparkbindings/home.html">Distributed ALS</a></li>
+                  <li><a href="/users/sparkbindings/home.html">SPCA</a></li>
+                  <li class="nav-header">Recommendations</li>
+                  <li><a href="/users/algorithms/recommender-overview.html">Recommender Overview</a></li>
+                  <li><a href="/users/algorithms/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
+                  <li class="nav-header">Classification</li>
+                  <li><a href="/users/algorithms/spark-naive-bayes.html">Spark Naive Bayes</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">MapReduce Basics<b class="caret"></b></a>
+                 <ul class="dropdown-menu">
+                  <li><a href="/users/basics/algorithms.html">List of algorithms</a>
+                  <li><a href="/users/basics/quickstart.html">Overview</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Working with text</li>
+                  <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
+                  <li><a href="/users/basics/collocations.html">Collocations</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Dimensionality reduction</li>
+                  <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
+                  <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Topic Models</li>      
+                  <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li class="nav-header">Classification</li>
+                  <li><a href="/users/classification/bayesian.html">Naive Bayes</a></li>
+                  <li><a href="/users/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
+                  <li><a href="/users/classification/logistic-regression.html">Logistic Regression (Single Machine)</a></li>
+                  <li><a href="/users/classification/partial-implementation.html">Random Forest</a></li>
+                  <li class="nav-header">Classification Examples</li>
+                  <li><a href="/users/classification/breiman-example.html">Breiman example</a></li>
+                  <li><a href="/users/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
+                  <li><a href="/users/classification/bankmarketing-example.html">SGD classifier bank marketing</a></li>
+                  <li><a href="/users/classification/wikipedia-classifier-example.html">Wikipedia XML parser and classifier</a></li>
+                  <li class="nav-header">Clustering</li>
+                  <li><a href="/users/clustering/k-means-clustering.html">k-Means</a></li>
+                  <li><a href="/users/clustering/canopy-clustering.html">Canopy</a></li>
+                  <li><a href="/users/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+                  <li><a href="/users/clustering/streaming-k-means.html">Streaming KMeans</a></li>
+                  <li><a href="/users/clustering/spectral-clustering.html">Spectral Clustering</a></li>
+                  <li class="nav-header">Clustering Commandline usage</li>
+                  <li><a href="/users/clustering/k-means-commandline.html">Options for k-Means</a></li>
+                  <li><a href="/users/clustering/canopy-commandline.html">Options for Canopy</a></li>
+                  <li><a href="/users/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
+                  <li class="nav-header">Clustering Examples</li>
+                  <li><a href="/users/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
+                  <li class="nav-header">Cluster Post processing</li>
+                  <li><a href="/users/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
+                  <li><a href="/users/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
+                  <li class="nav-header">Recommendations</li>
+                  <li><a href="/users/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
+                  <li><a href="/users/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
+		  <li><a href="/users/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
+                  <li><a href="/users/recommender/recommender-documentation.html">Overview</a></li>
+                  <li><a href="/users/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
+                  <li><a href="/users/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
+               </ul>
+              </li>
+              <!--  <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                
+                </ul> -->
+            </li>
+           </ul>
+          </div><!--/.nav-collapse -->
+        </div>
+      </div>
+    </div>
+
+</div>
+
+ <div id="sidebar">
+  <div id="sidebar-wrap">
+    <h2>Twitter</h2>
+	<ul class="sidemenu">
+		<li>
+<a class="twitter-timeline" href="https://twitter.com/ApacheMahout" data-widget-id="422861673444028416">Tweets by @ApacheMahout</a>
+<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+"://platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
+</li>
+	</ul>
+    <h2>Apache Software Foundation</h2>
+    <ul class="sidemenu">
+      <li><a href="http://www.apache.org/foundation/how-it-works.html">How the ASF works</a></li>
+      <li><a href="http://www.apache.org/foundation/getinvolved.html">Get Involved</a></li>
+      <li><a href="http://www.apache.org/dev/">Developer Resources</a></li>
+      <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+      <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+    </ul>
+    <h2>Related Projects</h2>
+    <ul class="sidemenu">
+      <li><a href="http://lucene.apache.org/">Apache Lucene</a></li>
+      <li><a href="http://hadoop.apache.org/">Apache Hadoop</a></li>
+      <li><a href="http://bigtop.apache.org/">Apache Bigtop</a></li>
+      <li><a href="http://spark.apache.org/">Apache Spark</a></li>
+	  <li><a href="http://flink.apache.org/">Apache Flink</a></li>
+    </ul>
+  </div>
+</div>
+
+  <div id="content-wrap" class="clearfix">
+   <div id="main">
+    <style type="text/css">
+/* The following code is added by mdx_elementid.py
+   It was originally lifted from http://subversion.apache.org/style/site.css */
+/*
+ * Hide class="elementid-permalink", except when an enclosing heading
+ * has the :hover property.
+ */
+.headerlink, .elementid-permalink {
+  visibility: hidden;
+}
+h2:hover > .headerlink, h3:hover > .headerlink, h1:hover > .headerlink, h6:hover > .headerlink, h4:hover > .headerlink, h5:hover > .headerlink, dt:hover > .elementid-permalink { visibility: visible }</style>
+<h1 id="distributed-cholesky-qr">Distributed Cholesky QR<a class="headerlink" href="#distributed-cholesky-qr" title="Permanent link">&para;</a></h1>
+<h2 id="intro">Intro<a class="headerlink" href="#intro" title="Permanent link">&para;</a></h2>
+<p>Mahout has a distributed implementation of QR decomposition for tall thin matricies[1].</p>
+<h2 id="algorithm">Algorithm<a class="headerlink" href="#algorithm" title="Permanent link">&para;</a></h2>
+<p>For the classic QR decomposition of the form <code>\(\mathbf{A}=\mathbf{QR},\mathbf{A}\in\mathbb{R}^{m\times n}\)</code> a distributed version is fairly easily achieved if <code>\(\mathbf{A}\)</code> is tall and thin such that <code>\(\mathbf{A}^{\top}\mathbf{A}\)</code> fits in memory, i.e. <em>m</em> is large but <em>n</em> &lt; ~5000 Under such circumstances, only <code>\(\mathbf{A}\)</code> and <code>\(\mathbf{Q}\)</code> are distributed matricies and <code>\(\mathbf{A^{\top}A}\)</code> and <code>\(\mathbf{R}\)</code> are in-core products. We just compute the in-core version of the Cholesky decomposition in the form of <code>\(\mathbf{LL}^{\top}= \mathbf{A}^{\top}\mathbf{A}\)</code>.  After that we take <code>\(\mathbf{R}= \mathbf{L}^{\top}\)</code> and <code>\(\mathbf{Q}=\mathbf{A}\left(\mathbf{L}^{\top}\right)^{-1}\)</code>.  The latter is easily achieved by multiplying each verticle block of <code>\(\mathbf{A}\)</code> by <code>\(\left(\mathbf{L}^{\top}\right)^{-1}\)</code
 >.  (There is no actual matrix inversion happening). </p>
+<h2 id="implementation">Implementation<a class="headerlink" href="#implementation" title="Permanent link">&para;</a></h2>
+<p>Mahout <code>dqrThin(...)</code> is implemented in the mahout <code>math-scala</code> algebraic optimizer which translates Mahout's R-like linear algebra operators into a physical plan for both Spark and H2O distributed engines.</p>
+<div class="codehilite"><pre><span class="n">def</span> <span class="n">dqrThin</span><span class="p">[</span><span class="n">K</span><span class="p">:</span> <span class="n">ClassTag</span><span class="p">](</span><span class="n">A</span><span class="p">:</span> <span class="n">DrmLike</span><span class="p">[</span><span class="n">K</span><span class="p">],</span> <span class="n">checkRankDeficiency</span><span class="p">:</span> <span class="n">Boolean</span> <span class="p">=</span> <span class="n">true</span><span class="p">):</span> <span class="p">(</span><span class="n">DrmLike</span><span class="p">[</span><span class="n">K</span><span class="p">],</span> <span class="n">Matrix</span><span class="p">)</span> <span class="p">=</span> <span class="p">{</span>        
+    <span class="k">if</span> <span class="p">(</span><span class="n">drmA</span><span class="p">.</span><span class="n">ncol</span> <span class="o">&gt;</span> 5000<span class="p">)</span>
+        <span class="nb">log</span><span class="p">.</span><span class="n">warn</span><span class="p">(</span>&quot;<span class="n">A</span> <span class="n">is</span> <span class="n">too</span> <span class="n">fat</span><span class="p">.</span> <span class="n">A</span><span class="o">&#39;</span><span class="n">A</span> <span class="n">must</span> <span class="n">fit</span> <span class="n">in</span> <span class="n">memory</span> <span class="n">and</span> <span class="n">easily</span> <span class="n">broadcasted</span><span class="p">.</span>&quot;<span class="p">)</span>
+    <span class="n">implicit</span> <span class="n">val</span> <span class="n">ctx</span> <span class="p">=</span> <span class="n">drmA</span><span class="p">.</span><span class="n">context</span>
+    <span class="n">val</span> <span class="n">AtA</span> <span class="p">=</span> <span class="p">(</span><span class="n">drmA</span><span class="p">.</span><span class="n">t</span> <span class="c">%*% drmA).checkpoint()</span>
+    <span class="n">val</span> <span class="n">inCoreAtA</span> <span class="p">=</span> <span class="n">AtA</span><span class="p">.</span><span class="n">collect</span>
+    <span class="n">val</span> <span class="n">ch</span> <span class="p">=</span> <span class="n">chol</span><span class="p">(</span><span class="n">inCoreAtA</span><span class="p">)</span>
+    <span class="n">val</span> <span class="n">inCoreR</span> <span class="p">=</span> <span class="p">(</span><span class="n">ch</span><span class="p">.</span><span class="n">getL</span> <span class="n">cloned</span><span class="p">)</span> <span class="n">t</span>
+    <span class="k">if</span> <span class="p">(</span><span class="n">checkRankDeficiency</span> <span class="o">&amp;&amp;</span> !<span class="n">ch</span><span class="p">.</span><span class="n">isPositiveDefinite</span><span class="p">)</span>
+        <span class="n">throw</span> <span class="n">new</span> <span class="n">IllegalArgumentException</span><span class="p">(</span>&quot;<span class="n">R</span> <span class="n">is</span> <span class="n">rank</span><span class="o">-</span><span class="n">deficient</span><span class="p">.</span>&quot;<span class="p">)</span>
+    <span class="n">val</span> <span class="n">bcastAtA</span> <span class="p">=</span> <span class="n">sc</span><span class="p">.</span><span class="n">broadcast</span><span class="p">(</span><span class="n">inCoreAtA</span><span class="p">)</span>
+    <span class="n">val</span> <span class="n">Q</span> <span class="p">=</span> <span class="n">A</span><span class="p">.</span><span class="n">mapBlock</span><span class="p">()</span> <span class="p">{</span>
+        <span class="k">case</span> <span class="p">(</span><span class="n">keys</span><span class="p">,</span> <span class="n">block</span><span class="p">)</span> <span class="p">=</span><span class="o">&gt;</span> <span class="n">keys</span> <span class="o">-&gt;</span> <span class="n">chol</span><span class="p">(</span><span class="n">bcastAtA</span><span class="p">).</span><span class="n">solveRight</span><span class="p">(</span><span class="n">block</span><span class="p">)</span>
+    <span class="p">}</span>
+    <span class="n">Q</span> <span class="o">-&gt;</span> <span class="n">inCoreR</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<h2 id="usage">Usage<a class="headerlink" href="#usage" title="Permanent link">&para;</a></h2>
+<p>The scala <code>dqrThin(...)</code> method can easily be called in any Spark or H2O application built with the <code>math-scala</code> library and the corresponding <code>Spark</code> or <code>H2O</code> engine module as follows:</p>
+<div class="codehilite"><pre><span class="n">import</span> <span class="n">org</span><span class="p">.</span><span class="n">apache</span><span class="p">.</span><span class="n">mahout</span><span class="p">.</span><span class="n">math</span><span class="p">.</span><span class="n">_</span>
+<span class="n">import</span> <span class="n">decompositions</span><span class="p">.</span><span class="n">_</span>
+<span class="n">import</span> <span class="n">drm</span><span class="p">.</span><span class="n">_</span>
+
+<span class="n">val</span><span class="p">(</span><span class="n">drmQ</span><span class="p">,</span> <span class="n">inCoreR</span><span class="p">)</span> <span class="p">=</span> <span class="n">dqrThin</span><span class="p">(</span><span class="n">drma</span><span class="p">)</span>
+</pre></div>
+
+
+<h2 id="references">References<a class="headerlink" href="#references" title="Permanent link">&para;</a></h2>
+<p>[1]: <a href="http://mahout.apache.org/users/sparkbindings/ScalaSparkBindings.pdf">Mahout Scala and Mahout Spark Bindings for Linear Algebra Subroutines</a></p>
+<p>[2]: <a href="http://mahout.apache.org/users/sparkbindings/home.html">Mahout Spark and Scala Bindings</a></p>
+   </div>
+  </div>     
+</div> 
+  <footer class="footer" align="center">
+    <div class="container">
+      <p>
+        Copyright &copy; 2014-2016 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+		  Apache Mahout, Mahout, Apache, the Apache feather logo, and the elephant rider logo are either registered trademarks or trademarks of <a href="http://www.apache.org/foundation/marks/">The Apache Software Foundation</a> in the United States and other countries.
+      </p>
+    </div>
+  </footer>
+  
+  <script src="/js/jquery-1.9.1.min.js"></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script>
+    (function() {
+      var cx = '012254517474945470291:vhsfv7eokdc';
+      var gcse = document.createElement('script');
+      gcse.type = 'text/javascript';
+      gcse.async = true;
+      gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
+          '//www.google.com/cse/cse.js?cx=' + cx;
+      var s = document.getElementsByTagName('script')[0];
+      s.parentNode.insertBefore(gcse, s);
+    })();
+  </script>
+</body>
+</html>

Modified: websites/staging/mahout/trunk/content/users/algorithms/d-qr.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/algorithms/d-qr.html (original)
+++ websites/staging/mahout/trunk/content/users/algorithms/d-qr.html Thu Feb  2 23:36:47 2017
@@ -281,9 +281,9 @@
 h2:hover > .headerlink, h3:hover > .headerlink, h1:hover > .headerlink, h6:hover > .headerlink, h4:hover > .headerlink, h5:hover > .headerlink, dt:hover > .elementid-permalink { visibility: visible }</style>
 <h1 id="distributed-cholesky-qr">Distributed Cholesky QR<a class="headerlink" href="#distributed-cholesky-qr" title="Permanent link">&para;</a></h1>
 <h2 id="intro">Intro<a class="headerlink" href="#intro" title="Permanent link">&para;</a></h2>
-<p>Mahout has a distributed implementation of QR decomposition for tall thin matricies[1].</p>
+<p>Mahout has a distributed implementation of QR decomposition for tall thin matrices[1].</p>
 <h2 id="algorithm">Algorithm<a class="headerlink" href="#algorithm" title="Permanent link">&para;</a></h2>
-<p>For the classic QR decomposition of the form <code>\(\mathbf{A}=\mathbf{QR},\mathbf{A}\in\mathbb{R}^{m\times n}\)</code> a distributed version is fairly easily achieved if <code>\(\mathbf{A}\)</code> is tall and thin such that <code>\(\mathbf{A}^{\top}\mathbf{A}\)</code> fits in memory, i.e. <em>m</em> is large but <em>n</em> &lt; ~5000 Under such circumstances, only <code>\(\mathbf{A}\)</code> and <code>\(\mathbf{Q}\)</code> are distributed matricies and <code>\(\mathbf{A^{\top}A}\)</code> and <code>\(\mathbf{R}\)</code> are in-core products. We just compute the in-core version of the Cholesky decomposition in the form of <code>\(\mathbf{LL}^{\top}= \mathbf{A}^{\top}\mathbf{A}\)</code>.  After that we take <code>\(\mathbf{R}= \mathbf{L}^{\top}\)</code> and <code>\(\mathbf{Q}=\mathbf{A}\left(\mathbf{L}^{\top}\right)^{-1}\)</code>.  The latter is easily achieved by multiplying each verticle block of <code>\(\mathbf{A}\)</code> by <code>\(\left(\mathbf{L}^{\top}\right)^{-1}\)</code
 >.  (There is no actual matrix inversion happening). </p>
+<p>For the classic QR decomposition of the form <code>\(\mathbf{A}=\mathbf{QR},\mathbf{A}\in\mathbb{R}^{m\times n}\)</code> a distributed version is fairly easily achieved if <code>\(\mathbf{A}\)</code> is tall and thin such that <code>\(\mathbf{A}^{\top}\mathbf{A}\)</code> fits in memory, i.e. <em>m</em> is large but <em>n</em> &lt; ~5000 Under such circumstances, only <code>\(\mathbf{A}\)</code> and <code>\(\mathbf{Q}\)</code> are distributed matrices and <code>\(\mathbf{A^{\top}A}\)</code> and <code>\(\mathbf{R}\)</code> are in-core products. We just compute the in-core version of the Cholesky decomposition in the form of <code>\(\mathbf{LL}^{\top}= \mathbf{A}^{\top}\mathbf{A}\)</code>.  After that we take <code>\(\mathbf{R}= \mathbf{L}^{\top}\)</code> and <code>\(\mathbf{Q}=\mathbf{A}\left(\mathbf{L}^{\top}\right)^{-1}\)</code>.  The latter is easily achieved by multiplying each vertical block of <code>\(\mathbf{A}\)</code> by <code>\(\left(\mathbf{L}^{\top}\right)^{-1}\)</code>
 .  (There is no actual matrix inversion happening). </p>
 <h2 id="implementation">Implementation<a class="headerlink" href="#implementation" title="Permanent link">&para;</a></h2>
 <p>Mahout <code>dqrThin(...)</code> is implemented in the mahout <code>math-scala</code> algebraic optimizer which translates Mahout's R-like linear algebra operators into a physical plan for both Spark and H2O distributed engines.</p>
 <div class="codehilite"><pre><span class="n">def</span> <span class="n">dqrThin</span><span class="p">[</span><span class="n">K</span><span class="p">:</span> <span class="n">ClassTag</span><span class="p">](</span><span class="n">A</span><span class="p">:</span> <span class="n">DrmLike</span><span class="p">[</span><span class="n">K</span><span class="p">],</span> <span class="n">checkRankDeficiency</span><span class="p">:</span> <span class="n">Boolean</span> <span class="p">=</span> <span class="n">true</span><span class="p">):</span> <span class="p">(</span><span class="n">DrmLike</span><span class="p">[</span><span class="n">K</span><span class="p">],</span> <span class="n">Matrix</span><span class="p">)</span> <span class="p">=</span> <span class="p">{</span>        

Added: websites/staging/mahout/trunk/content/users/algorithms/d-spca.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/algorithms/d-spca.html (added)
+++ websites/staging/mahout/trunk/content/users/algorithms/d-spca.html Thu Feb  2 23:36:47 2017
@@ -0,0 +1,350 @@
+<!DOCTYPE html>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <title>Apache Mahout: Scalable machine learning and data mining</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="Distribution" content="Global">
+  <meta name="Robots" content="index,follow">
+  <meta name="keywords" content="apache, apache hadoop, apache lucene,
+        business data mining, cluster analysis,
+        collaborative filtering, data extraction, data filtering, data framework, data integration,
+        data matching, data mining, data mining algorithms, data mining analysis, data mining data,
+        data mining introduction, data mining software,
+        data mining techniques, data representation, data set, datamining,
+        feature extraction, fuzzy k means, genetic algorithm, hadoop,
+        hierarchical clustering, high dimensional, introduction to data mining, kmeans,
+        knowledge discovery, learning approach, learning approaches, learning methods,
+        learning techniques, lucene, machine learning, machine translation, mahout apache,
+        mahout taste, map reduce hadoop, mining data, mining methods, naive bayes,
+        natural language processing,
+        supervised, text mining, time series data, unsupervised, web data mining">
+  <link rel="shortcut icon" type="image/x-icon" href="https://mahout.apache.org/images/favicon.ico">
+  <!--<script type="text/javascript" src="/js/prototype.js"></script>-->
+  <script type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/prototype/1.7.2.0/prototype.js"></script>
+  <script type="text/javascript" src="/js/effects.js"></script>
+  <script type="text/javascript" src="/js/search.js"></script>
+  <script type="text/javascript" src="/js/slides.js"></script>
+
+  <link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
+  <link href="/css/bootstrap-responsive.css" rel="stylesheet">
+  <link rel="stylesheet" href="/css/global.css" type="text/css">
+
+  <!-- mathJax stuff -- use `\(...\)` for inline style math in markdown -->
+  <script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    tex2jax: {
+      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
+    }
+  });
+  MathJax.Hub.Queue(function() {
+    var all = MathJax.Hub.getAllJax(), i;
+    for(i = 0; i < all.length; i += 1) {
+      all[i].SourceElement().parentNode.className += ' has-jax';
+    }
+  });
+  </script>
+  <script type="text/javascript">
+    var mathjax = document.createElement('script'); 
+    mathjax.type = 'text/javascript'; 
+    mathjax.async = true;
+
+    mathjax.src = ('https:' == document.location.protocol) ?
+        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' : 
+        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+	
+	  var s = document.getElementsByTagName('script')[0]; 
+    s.parentNode.insertBefore(mathjax, s);
+  </script>
+</head>
+
+<body id="home" data-twttr-rendered="true">
+  <div id="wrap">
+   <div id="header">
+    <div id="logo"><a href="/"><img src="/images/mahout-logo-brudman.png" alt="Logos for Mahout and Apache Software Foundation" /></a></div>
+  <div id="search">
+    <form id="search-form" action="http://www.google.com/search" method="get" class="navbar-search pull-right">    
+      <input value="http://mahout.apache.org" name="sitesearch" type="hidden">
+      <input class="search-query" name="q" id="query" type="text">
+      <input id="submission" type="image" src="/images/mahout-lupe.png" alt="Search" />
+    </form>
+  </div>
+ 
+    <div class="navbar navbar-inverse" style="position:absolute;top:133px;padding-right:0px;padding-left:0px;">
+      <div class="navbar-inner" style="border: none; background: #999; border: none; border-radius: 0px;">
+        <div class="container">
+          <button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <!-- <a class="brand" href="#">Apache Community Development Project</a> -->
+          <div class="nav-collapse collapse">
+            <ul class="nav">
+             <!-- <li><a href="/">Home</a></li> --> 
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/general/downloads.html">Downloads</a>
+                  <li><a href="/general/who-we-are.html">Who we are</a>
+                  <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
+                  <li><a href="/general/release-notes.html">Release Notes</a> 
+                  <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
+                  <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
+                  <li><a href="/general/professional-support.html">Professional Support</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Resources</li>
+                  <li><a href="/general/reference-reading.html">Reference Reading</a>
+                  <li><a href="/general/faq.html">FAQ</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Legal</li>
+                  <li><a href="http://www.apache.org/licenses/">License</a></li>
+                  <li><a href="http://www.apache.org/security/">Security</a></li>
+                  <li><a href="/general/privacy-policy.html">Privacy Policy</a>
+                </ul>
+              </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/developers/developer-resources.html">Developer resources</a></li>
+                  <li><a href="/developers/version-control.html">Version control</a></li>
+                  <li><a href="/developers/buildingmahout.html">Build from source</a></li>
+                  <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
+                  <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Contributions</li>
+                  <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
+                  <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
+                  <li><a href="/developers/gsoc.html">GSoC</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">For committers</li>
+                  <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
+                  <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
+                  <li><a href="/developers/github.html">Handling Github PRs</a></li>
+                  <li><a href="/developers/how-to-release.html">How to release</a></li>
+                  <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout-Samsara<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
+                  <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
+		  <li><a href="/users/flinkbindings/playing-with-samsara-flink.html">Flink Bindings Overview</a></li>
+                  <li class="nav-header">Engines</li>
+                  <li><a href="/users/sparkbindings/home.html">Spark</a></li>
+                  <li><a href="/users/environment/h2o-internals.html">H2O</a></li>
+                  <li><a href="/users/flinkbindings/flink-internals.html">Flink</a></li>
+                  <li class="nav-header">References</li>
+                  <li><a href="/users/environment/in-core-reference.html">In-Core Algebraic DSL Reference</a></li>
+                  <li><a href="/users/environment/out-of-core-reference.html">Distributed Algebraic DSL Reference</a></li>
+                  <li class="nav-header">Tutorials</li>
+                  <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
+                  <li><a href="/users/environment/how-to-build-an-app.html">How to build an app</a></li>
+                  <li><a href="/users/environment/classify-a-doc-from-the-shell.html">Building a text classifier in Mahout's Spark Shell</a></li>
+                </ul>
+              </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Algorithms<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/basics/algorithms.html">List of algorithms</a>
+                  <li class="nav-header">Distributed Matrix Decomposition</li>
+                  <li><a href="/users/algorithms/d-qr.html">Cholesky QR</a></li>
+                  <li><a href="/users/algorithms/d-ssvd.html">SSVD</a></li>
+                  <li><a href="/users/sparkbindings/home.html">Distributed ALS</a></li>
+                  <li><a href="/users/sparkbindings/home.html">SPCA</a></li>
+                  <li class="nav-header">Recommendations</li>
+                  <li><a href="/users/algorithms/recommender-overview.html">Recommender Overview</a></li>
+                  <li><a href="/users/algorithms/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
+                  <li class="nav-header">Classification</li>
+                  <li><a href="/users/algorithms/spark-naive-bayes.html">Spark Naive Bayes</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">MapReduce Basics<b class="caret"></b></a>
+                 <ul class="dropdown-menu">
+                  <li><a href="/users/basics/algorithms.html">List of algorithms</a>
+                  <li><a href="/users/basics/quickstart.html">Overview</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Working with text</li>
+                  <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
+                  <li><a href="/users/basics/collocations.html">Collocations</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Dimensionality reduction</li>
+                  <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
+                  <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Topic Models</li>      
+                  <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li class="nav-header">Classification</li>
+                  <li><a href="/users/classification/bayesian.html">Naive Bayes</a></li>
+                  <li><a href="/users/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
+                  <li><a href="/users/classification/logistic-regression.html">Logistic Regression (Single Machine)</a></li>
+                  <li><a href="/users/classification/partial-implementation.html">Random Forest</a></li>
+                  <li class="nav-header">Classification Examples</li>
+                  <li><a href="/users/classification/breiman-example.html">Breiman example</a></li>
+                  <li><a href="/users/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
+                  <li><a href="/users/classification/bankmarketing-example.html">SGD classifier bank marketing</a></li>
+                  <li><a href="/users/classification/wikipedia-classifier-example.html">Wikipedia XML parser and classifier</a></li>
+                  <li class="nav-header">Clustering</li>
+                  <li><a href="/users/clustering/k-means-clustering.html">k-Means</a></li>
+                  <li><a href="/users/clustering/canopy-clustering.html">Canopy</a></li>
+                  <li><a href="/users/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+                  <li><a href="/users/clustering/streaming-k-means.html">Streaming KMeans</a></li>
+                  <li><a href="/users/clustering/spectral-clustering.html">Spectral Clustering</a></li>
+                  <li class="nav-header">Clustering Commandline usage</li>
+                  <li><a href="/users/clustering/k-means-commandline.html">Options for k-Means</a></li>
+                  <li><a href="/users/clustering/canopy-commandline.html">Options for Canopy</a></li>
+                  <li><a href="/users/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
+                  <li class="nav-header">Clustering Examples</li>
+                  <li><a href="/users/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
+                  <li class="nav-header">Cluster Post processing</li>
+                  <li><a href="/users/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
+                  <li><a href="/users/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
+                  <li class="nav-header">Recommendations</li>
+                  <li><a href="/users/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
+                  <li><a href="/users/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
+		  <li><a href="/users/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
+                  <li><a href="/users/recommender/recommender-documentation.html">Overview</a></li>
+                  <li><a href="/users/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
+                  <li><a href="/users/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
+               </ul>
+              </li>
+              <!--  <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                
+                </ul> -->
+            </li>
+           </ul>
+          </div><!--/.nav-collapse -->
+        </div>
+      </div>
+    </div>
+
+</div>
+
+ <div id="sidebar">
+  <div id="sidebar-wrap">
+    <h2>Twitter</h2>
+	<ul class="sidemenu">
+		<li>
+<a class="twitter-timeline" href="https://twitter.com/ApacheMahout" data-widget-id="422861673444028416">Tweets by @ApacheMahout</a>
+<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+"://platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
+</li>
+	</ul>
+    <h2>Apache Software Foundation</h2>
+    <ul class="sidemenu">
+      <li><a href="http://www.apache.org/foundation/how-it-works.html">How the ASF works</a></li>
+      <li><a href="http://www.apache.org/foundation/getinvolved.html">Get Involved</a></li>
+      <li><a href="http://www.apache.org/dev/">Developer Resources</a></li>
+      <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+      <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+    </ul>
+    <h2>Related Projects</h2>
+    <ul class="sidemenu">
+      <li><a href="http://lucene.apache.org/">Apache Lucene</a></li>
+      <li><a href="http://hadoop.apache.org/">Apache Hadoop</a></li>
+      <li><a href="http://bigtop.apache.org/">Apache Bigtop</a></li>
+      <li><a href="http://spark.apache.org/">Apache Spark</a></li>
+	  <li><a href="http://flink.apache.org/">Apache Flink</a></li>
+    </ul>
+  </div>
+</div>
+
+  <div id="content-wrap" class="clearfix">
+   <div id="main">
+    <style type="text/css">
+/* The following code is added by mdx_elementid.py
+   It was originally lifted from http://subversion.apache.org/style/site.css */
+/*
+ * Hide class="elementid-permalink", except when an enclosing heading
+ * has the :hover property.
+ */
+.headerlink, .elementid-permalink {
+  visibility: hidden;
+}
+h2:hover > .headerlink, h3:hover > .headerlink, h1:hover > .headerlink, h6:hover > .headerlink, h4:hover > .headerlink, h5:hover > .headerlink, dt:hover > .elementid-permalink { visibility: visible }</style>
+<h1 id="distributed-stochastic-pca">Distributed Stochastic PCA<a class="headerlink" href="#distributed-stochastic-pca" title="Permanent link">&para;</a></h1>
+<h2 id="intro">Intro<a class="headerlink" href="#intro" title="Permanent link">&para;</a></h2>
+<p>Mahout has a distributed implementation of Stochastic PCA </p>
+<h2 id="motivation">Motivation<a class="headerlink" href="#motivation" title="Permanent link">&para;</a></h2>
+<p>Stochastic SVD method in Mahout produces reduced-rank Singular Value Decomposition output in its strict mathematical definition: <code>\(\mathbf{A}\approx\mathbf{UΣV}\)</code>, i.e. it creates outputs for matrices <code>\(\mathbf{U},\mathbf{V}, and \mathbf{Σ}\)</code>, each of which may be requested individually. The desired rank of decomposition, henceforth denoted as <em>k</em><code>\(\in\mathbb{N}_1\)</code>, is a parameter of the algorithm. The singular values inside diagonal matrix <code>\(\Sigma\)</code> satisfyσi+1≤σi∀i∈[1,k−1], i.e. sorted from biggest tosmallest. Cases of rank deficiency rank(A)&lt; karehandled by producing 0s in singular value positionsonce deficiency takes place.</p>
+<h2 id="implementation">Implementation<a class="headerlink" href="#implementation" title="Permanent link">&para;</a></h2>
+<p>Mahout <code>dqrThin(...)</code> is implemented in the mahout <code>math-scala</code> algebraic optimizer which translates Mahout's R-like linear algebra operators into a physical plan for both Spark and H2O distributed engines.</p>
+<div class="codehilite"><pre><span class="n">def</span> <span class="n">dqrThin</span><span class="p">[</span><span class="n">K</span><span class="p">:</span> <span class="n">ClassTag</span><span class="p">](</span><span class="n">A</span><span class="p">:</span> <span class="n">DrmLike</span><span class="p">[</span><span class="n">K</span><span class="p">],</span> <span class="n">checkRankDeficiency</span><span class="p">:</span> <span class="n">Boolean</span> <span class="p">=</span> <span class="n">true</span><span class="p">):</span> <span class="p">(</span><span class="n">DrmLike</span><span class="p">[</span><span class="n">K</span><span class="p">],</span> <span class="n">Matrix</span><span class="p">)</span> <span class="p">=</span> <span class="p">{</span>        
+    <span class="k">if</span> <span class="p">(</span><span class="n">drmA</span><span class="p">.</span><span class="n">ncol</span> <span class="o">&gt;</span> 5000<span class="p">)</span>
+        <span class="nb">log</span><span class="p">.</span><span class="n">warn</span><span class="p">(</span>&quot;<span class="n">A</span> <span class="n">is</span> <span class="n">too</span> <span class="n">fat</span><span class="p">.</span> <span class="n">A</span><span class="o">&#39;</span><span class="n">A</span> <span class="n">must</span> <span class="n">fit</span> <span class="n">in</span> <span class="n">memory</span> <span class="n">and</span> <span class="n">easily</span> <span class="n">broadcasted</span><span class="p">.</span>&quot;<span class="p">)</span>
+    <span class="n">implicit</span> <span class="n">val</span> <span class="n">ctx</span> <span class="p">=</span> <span class="n">drmA</span><span class="p">.</span><span class="n">context</span>
+    <span class="n">val</span> <span class="n">AtA</span> <span class="p">=</span> <span class="p">(</span><span class="n">drmA</span><span class="p">.</span><span class="n">t</span> <span class="c">%*% drmA).checkpoint()</span>
+    <span class="n">val</span> <span class="n">inCoreAtA</span> <span class="p">=</span> <span class="n">AtA</span><span class="p">.</span><span class="n">collect</span>
+    <span class="n">val</span> <span class="n">ch</span> <span class="p">=</span> <span class="n">chol</span><span class="p">(</span><span class="n">inCoreAtA</span><span class="p">)</span>
+    <span class="n">val</span> <span class="n">inCoreR</span> <span class="p">=</span> <span class="p">(</span><span class="n">ch</span><span class="p">.</span><span class="n">getL</span> <span class="n">cloned</span><span class="p">)</span> <span class="n">t</span>
+    <span class="k">if</span> <span class="p">(</span><span class="n">checkRankDeficiency</span> <span class="o">&amp;&amp;</span> !<span class="n">ch</span><span class="p">.</span><span class="n">isPositiveDefinite</span><span class="p">)</span>
+        <span class="n">throw</span> <span class="n">new</span> <span class="n">IllegalArgumentException</span><span class="p">(</span>&quot;<span class="n">R</span> <span class="n">is</span> <span class="n">rank</span><span class="o">-</span><span class="n">deficient</span><span class="p">.</span>&quot;<span class="p">)</span>
+    <span class="n">val</span> <span class="n">bcastAtA</span> <span class="p">=</span> <span class="n">sc</span><span class="p">.</span><span class="n">broadcast</span><span class="p">(</span><span class="n">inCoreAtA</span><span class="p">)</span>
+    <span class="n">val</span> <span class="n">Q</span> <span class="p">=</span> <span class="n">A</span><span class="p">.</span><span class="n">mapBlock</span><span class="p">()</span> <span class="p">{</span>
+        <span class="k">case</span> <span class="p">(</span><span class="n">keys</span><span class="p">,</span> <span class="n">block</span><span class="p">)</span> <span class="p">=</span><span class="o">&gt;</span> <span class="n">keys</span> <span class="o">-&gt;</span> <span class="n">chol</span><span class="p">(</span><span class="n">bcastAtA</span><span class="p">).</span><span class="n">solveRight</span><span class="p">(</span><span class="n">block</span><span class="p">)</span>
+    <span class="p">}</span>
+    <span class="n">Q</span> <span class="o">-&gt;</span> <span class="n">inCoreR</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<h2 id="usage">Usage<a class="headerlink" href="#usage" title="Permanent link">&para;</a></h2>
+<p>The scala <code>dqrThin(...)</code> method can easily be called in any Spark or H2O application built with the <code>math-scala</code> library and the corresponding <code>Spark</code> or <code>H2O</code> engine module as follows:</p>
+<div class="codehilite"><pre><span class="n">import</span> <span class="n">org</span><span class="p">.</span><span class="n">apache</span><span class="p">.</span><span class="n">mahout</span><span class="p">.</span><span class="n">math</span><span class="p">.</span><span class="n">_</span>
+<span class="n">import</span> <span class="n">decompositions</span><span class="p">.</span><span class="n">_</span>
+<span class="n">import</span> <span class="n">drm</span><span class="p">.</span><span class="n">_</span>
+
+<span class="n">val</span><span class="p">(</span><span class="n">drmQ</span><span class="p">,</span> <span class="n">inCoreR</span><span class="p">)</span> <span class="p">=</span> <span class="n">dqrThin</span><span class="p">(</span><span class="n">drma</span><span class="p">)</span>
+</pre></div>
+
+
+<h2 id="references">References<a class="headerlink" href="#references" title="Permanent link">&para;</a></h2>
+<p>[1]: <a href="http://mahout.apache.org/users/sparkbindings/ScalaSparkBindings.pdf">Mahout Scala and Mahout Spark Bindings for Linear Algebra Subroutines</a></p>
+<p>[2]: <a href="http://mahout.apache.org/users/sparkbindings/home.html">Mahout Spark and Scala Bindings</a></p>
+   </div>
+  </div>     
+</div> 
+  <footer class="footer" align="center">
+    <div class="container">
+      <p>
+        Copyright &copy; 2014-2016 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+		  Apache Mahout, Mahout, Apache, the Apache feather logo, and the elephant rider logo are either registered trademarks or trademarks of <a href="http://www.apache.org/foundation/marks/">The Apache Software Foundation</a> in the United States and other countries.
+      </p>
+    </div>
+  </footer>
+  
+  <script src="/js/jquery-1.9.1.min.js"></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script>
+    (function() {
+      var cx = '012254517474945470291:vhsfv7eokdc';
+      var gcse = document.createElement('script');
+      gcse.type = 'text/javascript';
+      gcse.async = true;
+      gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
+          '//www.google.com/cse/cse.js?cx=' + cx;
+      var s = document.getElementsByTagName('script')[0];
+      s.parentNode.insertBefore(gcse, s);
+    })();
+  </script>
+</body>
+</html>



Mime
View raw message