mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rawkintr...@apache.org
Subject [05/51] [abbrv] [partial] mahout git commit: WEBSITE Emergency Patches for go-live closes apache/mahout#317
Date Sat, 13 May 2017 08:00:57 GMT
http://git-wip-us.apache.org/repos/asf/mahout/blob/7c0babd7/website/oldsite/_site/users/basics/creating-vectors-from-text.html
----------------------------------------------------------------------
diff --git a/website/oldsite/_site/users/basics/creating-vectors-from-text.html b/website/oldsite/_site/users/basics/creating-vectors-from-text.html
deleted file mode 100644
index d00d942..0000000
--- a/website/oldsite/_site/users/basics/creating-vectors-from-text.html
+++ /dev/null
@@ -1,554 +0,0 @@
-
-
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="utf-8">
-  <meta http-equiv="X-UA-Compatible" content="IE=edge">
-
-  <title>Creating Vectors from Text</title>
-  
-  <meta name="author" content="The Apache Software Foundation">
-
-  <!-- Enable responsive viewport -->
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-
-  <!-- Bootstrap styles -->
-  <link href="/assets/themes/mahout3/css/bootstrap.min.css" rel="stylesheet">
-  <!-- Optional theme -->
-  <link href="/assets/themes/mahout3/css/bootstrap-theme.min.css" rel="stylesheet">
-  <!-- Sticky Footer -->
-  <link href="/assets/themes/mahout3/css/bs-sticky-footer.css" rel="stylesheet">
-
-  <!-- Custom styles -->
-  <link href="/assets/themes/mahout3/css/style.css" rel="stylesheet" type="text/css" media="all">
-
-  <!-- HTML5 Shim and Respond.js IE8 support of HTML5 elements and media queries -->
-  <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
-  <!--[if lt IE 9]>
-  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
-  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
-  <![endif]-->
-
-  <!-- Fav and touch icons -->
-  <!-- Update these with your own images
-    <link rel="shortcut icon" href="images/favicon.ico">
-    <link rel="apple-touch-icon" href="images/apple-touch-icon.png">
-    <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png">
-    <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png">
-  -->
-
-  <!-- atom & rss feed -->
-  <link href="/atom.xml" type="application/atom+xml" rel="alternate" title="Sitewide ATOM Feed">
-  <link href="/rss.xml" type="application/rss+xml" rel="alternate" title="Sitewide RSS Feed">
-  <script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    tex2jax: {
-      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
-    }
-  });
-  MathJax.Hub.Queue(function() {
-    var all = MathJax.Hub.getAllJax(), i;
-    for(i = 0; i < all.length; i += 1) {
-      all[i].SourceElement().parentNode.className += ' has-jax';
-    }
-  });
-  </script>
-  <script type="text/javascript">
-    var mathjax = document.createElement('script');
-    mathjax.type = 'text/javascript';
-    mathjax.async = true;
-
-    mathjax.src = ('https:' == document.location.protocol) ?
-        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' :
-        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
-
-      var s = document.getElementsByTagName('script')[0];
-    s.parentNode.insertBefore(mathjax, s);
-  </script>
-</head>
-
-<nav class="navbar navbar-default navbar-fixed-top">
-  <div class="container-fluid">
-    <!-- Brand and toggle get grouped for better mobile display -->
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <a class="navbar-brand" href="/">
-        <img src="/assets/img/Mahout-logo-82x100.png" height="30" alt="I'm mahout">
-      </a>
-    </div>
-
-    <!--<div class="nav-collapse collapse">-->
-<div class="collapse navbar-collapse" id="main-navbar">
-    <ul class="nav navbar-nav">
-        <!-- <li><a href="/">Home</a></li> -->
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/general/downloads.html">Downloads</a>
-                <li><a href="/general/who-we-are.html">Who we are</a>
-                <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
-                <li><a href="/general/release-notes.html">Release Notes</a>
-                <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
-                <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
-                <li><a href="/general/professional-support.html">Professional Support</a>
-                <li class="divider"></li>
-                <li class="nav-header">Resources</li>
-                <li><a href="/general/reference-reading.html">Reference Reading</a>
-                <li><a href="/general/faq.html">FAQ</a>
-                <li class="divider"></li>
-                <li class="nav-header">Legal</li>
-                <li><a href="http://www.apache.org/licenses/">License</a></li>
-                <li><a href="http://www.apache.org/security/">Security</a></li>
-                <li><a href="/general/privacy-policy.html">Privacy Policy</a>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/developers/developer-resources.html">Developer resources</a></li>
-                <li><a href="/developers/version-control.html">Version control</a></li>
-                <li><a href="/developers/buildingmahout.html">Build from source</a></li>
-                <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
-                <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">Contributions</li>
-                <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
-                <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
-                <li><a href="/developers/gsoc.html">GSoC</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">For committers</li>
-                <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
-                <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
-                <li><a href="/developers/github.html">Handling Github PRs</a></li>
-                <li><a href="/developers/how-to-release.html">How to release</a></li>
-                <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout-Samsara<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
-                <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
-                <li><a href="/users/flinkbindings/playing-with-samsara-flink.html">Flink Bindings Overview</a></li>
-                <li class="nav-header">Engines</li>
-                <li><a href="/users/sparkbindings/home.html">Spark</a></li>
-                <li><a href="/users/environment/h2o-internals.html">H2O</a></li>
-                <li><a href="/users/flinkbindings/flink-internals.html">Flink</a></li>
-                <li class="nav-header">References</li>
-                <li><a href="/users/environment/in-core-reference.html">In-Core Algebraic DSL Reference</a></li>
-                <li><a href="/users/environment/out-of-core-reference.html">Distributed Algebraic DSL Reference</a></li>
-                <li class="nav-header">Tutorials</li>
-                <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
-                <li><a href="/users/environment/how-to-build-an-app.html">How to build an app</a></li>
-                <li><a href="/users/environment/classify-a-doc-from-the-shell.html">Building a text classifier in Mahout's Spark Shell</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Algorithms<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/basics/algorithms.html">List of algorithms</a>
-                <li class="nav-header">Distributed Matrix Decomposition</li>
-                <li><a href="/users/algorithms/d-qr.html">Cholesky QR</a></li>
-                <li><a href="/users/algorithms/d-ssvd.html">SSVD</a></li>
-                <li><a href="/users/algorithms/d-als.html">Distributed ALS</a></li>
-                <li><a href="/users/algorithms/d-spca.html">SPCA</a></li>
-                <li class="nav-header">Recommendations</li>
-                <li><a href="/users/algorithms/recommender-overview.html">Recommender Overview</a></li>
-                <li><a href="/users/algorithms/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
-                <li class="nav-header">Classification</li>
-                <li><a href="/users/algorithms/spark-naive-bayes.html">Spark Naive Bayes</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">MapReduce Basics<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/basics/algorithms.html">List of algorithms</a>
-                <li><a href="/users/basics/quickstart.html">Overview</a>
-                <li class="divider"></li>
-                <li class="nav-header">Working with text</li>
-                <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
-                <li><a href="/users/basics/collocations.html">Collocations</a>
-                <li class="divider"></li>
-                <li class="nav-header">Dimensionality reduction</li>
-                <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
-                <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">Topic Models</li>
-                <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li class="nav-header">Classification</li>
-                <li><a href="/users/classification/bayesian.html">Naive Bayes</a></li>
-                <li><a href="/users/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
-                <li><a href="/users/classification/logistic-regression.html">Logistic Regression (Single Machine)</a></li>
-                <li><a href="/users/classification/partial-implementation.html">Random Forest</a></li>
-                <li class="nav-header">Classification Examples</li>
-                <li><a href="/users/classification/breiman-example.html">Breiman example</a></li>
-                <li><a href="/users/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
-                <li><a href="/users/classification/bankmarketing-example.html">SGD classifier bank marketing</a></li>
-                <li><a href="/users/classification/wikipedia-classifier-example.html">Wikipedia XML parser and classifier</a></li>
-                <li class="nav-header">Clustering</li>
-                <li><a href="/users/clustering/k-means-clustering.html">k-Means</a></li>
-                <li><a href="/users/clustering/canopy-clustering.html">Canopy</a></li>
-                <li><a href="/users/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
-                <li><a href="/users/clustering/streaming-k-means.html">Streaming KMeans</a></li>
-                <li><a href="/users/clustering/spectral-clustering.html">Spectral Clustering</a></li>
-                <li class="nav-header">Clustering Commandline usage</li>
-                <li><a href="/users/clustering/k-means-commandline.html">Options for k-Means</a></li>
-                <li><a href="/users/clustering/canopy-commandline.html">Options for Canopy</a></li>
-                <li><a href="/users/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
-                <li class="nav-header">Clustering Examples</li>
-                <li><a href="/users/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
-                <li class="nav-header">Cluster Post processing</li>
-                <li><a href="/users/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
-                <li><a href="/users/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
-                <li class="nav-header">Recommendations</li>
-                <li><a href="/users/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
-                <li><a href="/users/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
-                <li><a href="/users/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
-                <li><a href="/users/recommender/recommender-documentation.html">Overview</a></li>
-                <li><a href="/users/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
-                <li><a href="/users/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
-            </ul>
-        </li>
-        <!--  <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
-          <ul class="dropdown-menu">
-
-          </ul> -->
-        </li>
-    </ul>
-</div><!--/.nav-collapse -->
-  </div><!-- /.container-fluid -->
-</nav>
-
-<body>
-
-<div id="wrap">
-  <body class="">
-
-  <div class="container">
-    <h1 id="creating-vectors-from-text">Creating vectors from text</h1>
-<p><a name="CreatingVectorsfromText-Introduction"></a></p>
-<h1 id="introduction">Introduction</h1>
-
-<p>For clustering and classifying documents it is usually necessary to convert the raw text
-into vectors that can then be consumed by the clustering <a href="algorithms.html">Algorithms</a>.  These approaches are described below.</p>
-
-<p><a name="CreatingVectorsfromText-FromLucene"></a></p>
-<h1 id="from-lucene">From Lucene</h1>
-
-<p><em>NOTE: Your Lucene index must be created with the same version of Lucene
-used in Mahout.  As of Mahout 0.9 this is Lucene 4.6.1. If these versions dont match you will likely get “Exception in thread “main”
-org.apache.lucene.index.CorruptIndexException: Unknown format version: -11”
-as an error.</em></p>
-
-<p>Mahout has utilities that allow one to easily produce Mahout Vector
-representations from a Lucene (and Solr, since they are they same) index.</p>
-
-<p>For this, we assume you know how to build a Lucene/Solr index.	For those
-who don’t, it is probably easiest to get up and running using <a href="http://lucene.apache.org/solr">Solr</a>
- as it can ingest things like PDFs, XML, Office, etc. and create a Lucene
-index.	For those wanting to use just Lucene, see the <a href="http://lucene.apache.org/core">Lucene website</a>
- or check out <em>Lucene In Action</em> by Erik Hatcher, Otis Gospodnetic and Mike
-McCandless.</p>
-
-<p>To get started, make sure you get a fresh copy of Mahout from <a href="http://mahout.apache.org/developers/buildingmahout.html">GitHub</a>
- and are comfortable building it. It defines interfaces and implementations
-for efficiently iterating over a data source (it only supports Lucene
-currently, but should be extensible to databases, Solr, etc.) and produces
-a Mahout Vector file and term dictionary which can then be used for
-clustering.   The main code for driving this is the driver program located
-in the org.apache.mahout.utils.vectors package.  The driver program offers
-several input options, which can be displayed by specifying the –help
-option.  Examples of running the driver are included below:</p>
-
-<p><a name="CreatingVectorsfromText-GeneratinganoutputfilefromaLuceneIndex"></a></p>
-<h4 id="generating-an-output-file-from-a-lucene-index">Generating an output file from a Lucene Index</h4>
-
-<div class="highlighter-rouge"><pre class="highlight"><code>$MAHOUT_HOME/bin/mahout lucene.vector 
-    --dir (-d) dir                     The Lucene directory      
-    --idField idField                  The field in the index    
-                                           containing the index.  If 
-                                           null, then the Lucene     
-                                           internal doc id is used   
-                                           which is prone to error   
-                                           if the underlying index   
-                                           changes                   
-    --output (-o) output               The output file           
-    --delimiter (-l) delimiter         The delimiter for         
-                                           outputting the dictionary 
-    --help (-h)                        Print out help            
-    --field (-f) field                 The field in the index    
-    --max (-m) max                         The maximum number of     
-                                           vectors to output.  If    
-                                           not specified, then it    
-                                           will loop over all docs   
-    --dictOut (-t) dictOut             The output of the         
-                                           dictionary                
-    --seqDictOut (-st) seqDictOut      The output of the         
-                                           dictionary as sequence    
-                                           file                      
-    --norm (-n) norm                   The norm to use,          
-                                           expressed as either a     
-                                           double or "INF" if you    
-                                           want to use the Infinite  
-                                           norm.  Must be greater or 
-                                           equal to 0.  The default  
-                                           is not to normalize       
-    --maxDFPercent (-x) maxDFPercent   The max percentage of     
-                                           docs for the DF.  Can be  
-                                           used to remove really     
-                                           high frequency terms.     
-                                           Expressed as an integer   
-                                           between 0 and 100.        
-                                           Default is 99.            
-    --weight (-w) weight               The kind of weight to     
-                                           use. Currently TF or      
-                                           TFIDF                     
-    --minDF (-md) minDF                The minimum document      
-                                           frequency.  Default is 1  
-    --maxPercentErrorDocs (-err) mErr  The max percentage of     
-                                           docs that can have a null 
-                                           term vector. These are    
-                                           noise document and can    
-                                           occur if the analyzer     
-                                           used strips out all terms 
-                                           in the target field. This 
-                                           percentage is expressed   
-                                           as a value between 0 and  
-                                           1. The default is 0.  
-</code></pre>
-</div>
-
-<h4 id="example-create-50-vectors-from-an-index">Example: Create 50 Vectors from an Index</h4>
-
-<div class="highlighter-rouge"><pre class="highlight"><code>$MAHOUT_HOME/bin/mahout lucene.vector
-    --dir $WORK_DIR/wikipedia/solr/data/index 
-    --field body 
-    --dictOut $WORK_DIR/solr/wikipedia/dict.txt
-    --output $WORK_DIR/solr/wikipedia/out.txt 
-    --max 50
-</code></pre>
-</div>
-
-<p>This uses the index specified by –dir and the body field in it and writes
-out the info to the output dir and the dictionary to dict.txt.	It only
-outputs 50 vectors.  If you don’t specify –max, then all the documents in
-the index are output.</p>
-
-<p><a name="CreatingVectorsfromText-50VectorsFromLuceneL2Norm"></a></p>
-<h4 id="example-creating-50-normalized-vectors-from-a-lucene-index-using-the-l_2-norm">Example: Creating 50 Normalized Vectors from a Lucene Index using the <a href="http://en.wikipedia.org/wiki/Lp_space">L_2 Norm</a></h4>
-
-<div class="highlighter-rouge"><pre class="highlight"><code>$MAHOUT_HOME/bin/mahout lucene.vector 
-    --dir $WORK_DIR/wikipedia/solr/data/index 
-    --field body 
-    --dictOut $WORK_DIR/solr/wikipedia/dict.txt
-    --output $WORK_DIR/solr/wikipedia/out.txt 
-    --max 50 
-    --norm 2
-</code></pre>
-</div>
-
-<p><a name="CreatingVectorsfromText-FromDirectoryofTextdocuments"></a></p>
-<h2 id="from-a-directory-of-text-documents">From A Directory of Text documents</h2>
-<p>Mahout has utilities to generate Vectors from a directory of text
-documents. Before creating the vectors, you need to convert the documents
-to SequenceFile format. SequenceFile is a hadoop class which allows us to
-write arbitary (key, value) pairs into it. The DocumentVectorizer requires the
-key to be a Text with a unique document id, and value to be the Text
-content in UTF-8 format.</p>
-
-<p>You may find <a href="http://tika.apache.org/">Tika</a> helpful in converting
-binary documents to text.</p>
-
-<p><a name="CreatingVectorsfromText-ConvertingdirectoryofdocumentstoSequenceFileformat"></a></p>
-<h4 id="converting-directory-of-documents-to-sequencefile-format">Converting directory of documents to SequenceFile format</h4>
-<p>Mahout has a nifty utility which reads a directory path including its
-sub-directories and creates the SequenceFile in a chunked manner for us.</p>
-
-<div class="highlighter-rouge"><pre class="highlight"><code>$MAHOUT_HOME/bin/mahout seqdirectory 
-    --input (-i) input                       Path to job input directory.   
-    --output (-o) output                     The directory pathname for     
-                                                 output.                        
-    --overwrite (-ow)                        If present, overwrite the      
-                                                 output directory before        
-                                                 running job                    
-    --method (-xm) method                    The execution method to use:   
-                                                 sequential or mapreduce.       
-                                                 Default is mapreduce           
-    --chunkSize (-chunk) chunkSize           The chunkSize in MegaBytes.    
-                                                 Defaults to 64                 
-    --fileFilterClass (-filter) fFilterClass The name of the class to use   
-                                                 for file parsing. Default:     
-                                                 org.apache.mahout.text.PrefixAdditionFilter                   
-    --keyPrefix (-prefix) keyPrefix          The prefix to be prepended to  
-                                                 the key                        
-    --charset (-c) charset                   The name of the character      
-                                                 encoding of the input files.   
-                                                 Default to UTF-8 {accepts: cp1252|ascii...}             
-    --method (-xm) method                    The execution method to use:   
-                                                 sequential or mapreduce.       
-                                             Default is mapreduce           
-    --overwrite (-ow)                        If present, overwrite the      
-                                                 output directory before        
-                                                 running job                    
-    --help (-h)                              Print out help                 
-    --tempDir tempDir                        Intermediate output directory  
-    --startPhase startPhase                  First phase to run             
-    --endPhase endPhase                      Last phase to run  
-</code></pre>
-</div>
-
-<p>The output of seqDirectory will be a Sequence file &lt; Text, Text &gt; of all documents (/sub-directory-path/documentFileName, documentText).</p>
-
-<p><a name="CreatingVectorsfromText-CreatingVectorsfromSequenceFile"></a></p>
-<h4 id="creating-vectors-from-sequencefile">Creating Vectors from SequenceFile</h4>
-
-<p>From the sequence file generated from the above step run the following to
-generate vectors.</p>
-
-<div class="highlighter-rouge"><pre class="highlight"><code>$MAHOUT_HOME/bin/mahout seq2sparse
-    --minSupport (-s) minSupport      (Optional) Minimum Support. Default       
-                                          Value: 2                                  
-    --analyzerName (-a) analyzerName  The class name of the analyzer            
-    --chunkSize (-chunk) chunkSize    The chunkSize in MegaBytes. Default       
-                                          Value: 100MB                              
-    --output (-o) output              The directory pathname for output.        
-    --input (-i) input                Path to job input directory.              
-    --minDF (-md) minDF               The minimum document frequency.  Default  
-                                          is 1                                      
-    --maxDFSigma (-xs) maxDFSigma     What portion of the tf (tf-idf) vectors   
-                                          to be used, expressed in times the        
-                                          standard deviation (sigma) of the         
-                                          document frequencies of these vectors.    
-                                          Can be used to remove really high         
-                                          frequency terms. Expressed as a double    
-                                          value. Good value to be specified is 3.0. 
-                                          In case the value is less than 0 no       
-                                          vectors will be filtered out. Default is  
-                                          -1.0.  Overrides maxDFPercent             
-    --maxDFPercent (-x) maxDFPercent  The max percentage of docs for the DF.    
-                                          Can be used to remove really high         
-                                          frequency terms. Expressed as an integer  
-                                          between 0 and 100. Default is 99.  If     
-                                          maxDFSigma is also set, it will override  
-                                          this value.                               
-    --weight (-wt) weight             The kind of weight to use. Currently TF   
-                                          or TFIDF. Default: TFIDF                  
-    --norm (-n) norm                  The norm to use, expressed as either a    
-                                          float or "INF" if you want to use the     
-                                          Infinite norm.  Must be greater or equal  
-                                          to 0.  The default is not to normalize    
-    --minLLR (-ml) minLLR             (Optional)The minimum Log Likelihood      
-                                          Ratio(Float)  Default is 1.0              
-    --numReducers (-nr) numReducers   (Optional) Number of reduce tasks.        
-                                          Default Value: 1                          
-    --maxNGramSize (-ng) ngramSize    (Optional) The maximum size of ngrams to  
-                                          create (2 = bigrams, 3 = trigrams, etc)   
-                                          Default Value:1                           
-    --overwrite (-ow)                 If set, overwrite the output directory    
-    --help (-h)                           Print out help                            
-    --sequentialAccessVector (-seq)   (Optional) Whether output vectors should  
-                                          be SequentialAccessVectors. Default is false;
-                                          true required for running some algorithms
-                                          (LDA,Lanczos)                                
-    --namedVector (-nv)               (Optional) Whether output vectors should  
-                                          be NamedVectors. If set true else false   
-    --logNormalize (-lnorm)           (Optional) Whether output vectors should  
-                                          be logNormalize. If set true else false
-</code></pre>
-</div>
-
-<p>This will create SequenceFiles of tokenized documents &lt; Text, StringTuple &gt;  (docID, tokenizedDoc) and vectorized documents &lt; Text, VectorWritable &gt; (docID, TF-IDF Vector).</p>
-
-<p>As well, seq2sparse will create SequenceFiles for: a dictionary (wordIndex, word), a word frequency count (wordIndex, count) and a document frequency count (wordIndex, DFCount) in the output directory.</p>
-
-<p>The –minSupport option is the min frequency for the word to be considered as a feature; –minDF is the min number of documents the word needs to be in; –maxDFPercent is the max value of the expression (document frequency of a word/total number of document) to be considered as good feature to be in the document. These options are helpful in removing high frequency features like stop words.</p>
-
-<p>The vectorized documents can then be used as input to many of Mahout’s classification and clustering algorithms.</p>
-
-<h4 id="example-creating-normalized-tf-idf-vectors-from-a-directory-of-text-documents-using-trigrams-and-the-l_2-norm">Example: Creating Normalized <a href="http://en.wikipedia.org/wiki/Tf%E2%80%93idf">TF-IDF</a> Vectors from a directory of text documents using <a href="http://en.wikipedia.org/wiki/N-gram">trigrams</a> and the <a href="http://en.wikipedia.org/wiki/Lp_space">L_2 Norm</a></h4>
-<p>Create sequence files from the directory of text documents:</p>
-
-<div class="highlighter-rouge"><pre class="highlight"><code>$MAHOUT_HOME/bin/mahout seqdirectory 
-    -i $WORK_DIR/reuters 
-    -o $WORK_DIR/reuters-seqdir 
-    -c UTF-8
-    -chunk 64
-    -xm sequential
-</code></pre>
-</div>
-
-<p>Vectorize the documents using trigrams, L_2 length normalization and a maximum document frequency cutoff of 85%.</p>
-
-<div class="highlighter-rouge"><pre class="highlight"><code>$MAHOUT_HOME/bin/mahout seq2sparse 
-    -i $WORK_DIR/reuters-out-seqdir/ 
-    -o $WORK_DIR/reuters-out-seqdir-sparse-kmeans 
-    --namedVec
-    -wt tfidf
-    -ng 3
-    -n 2
-    --maxDFPercent 85 
-</code></pre>
-</div>
-
-<p>The sequence file in the $WORK_DIR/reuters-out-seqdir-sparse-kmeans/tfidf-vectors directory can now be used as input to the Mahout <a href="http://mahout.apache.org/users/clustering/k-means-clustering.html">k-Means</a> clustering algorithm.</p>
-
-<p><a name="CreatingVectorsfromText-Background"></a></p>
-<h2 id="background">Background</h2>
-
-<ul>
-  <li><a href="http://markmail.org/thread/l5zi3yk446goll3o">Discussion on centroid calculations with sparse vectors</a></li>
-</ul>
-
-<p><a name="CreatingVectorsfromText-ConvertingexistingvectorstoMahout'sformat"></a></p>
-<h2 id="converting-existing-vectors-to-mahouts-format">Converting existing vectors to Mahout’s format</h2>
-
-<p>If you are in the happy position to already own a document (as in: texts,
-images or whatever item you wish to treat) processing pipeline, the
-question arises of how to convert the vectors into the Mahout vector
-format. Probably the easiest way to go would be to implement your own
-Iterable<Vector> (called VectorIterable in the example below) and then
-reuse the existing VectorWriter classes:</Vector></p>
-
-<div class="highlighter-rouge"><pre class="highlight"><code>VectorWriter vectorWriter = SequenceFile.createWriter(filesystem,
-                                                      configuration,
-                                                      outfile,
-                                                      LongWritable.class,
-                                                      SparseVector.class);
-
-long numDocs = vectorWriter.write(new VectorIterable(), Long.MAX_VALUE);
-</code></pre>
-</div>
-
-
-  </div>
-
-
-</div>
-
-<div id="footer">
-  <div class="container">
-    <p>&copy; 2017 The Apache Software Foundation
-      with help from <a href="http://jekyllbootstrap.com" target="_blank" title="The Definitive Jekyll Blogging Framework">Jekyll Bootstrap</a>
-      and <a href="http://getbootstrap.com" target="_blank">Bootstrap</a>
-    </p>
-  </div>
-</div>
-
-
-
-
-
-
-
-<!-- Latest compiled and minified JavaScript, requires jQuery 1.x (2.x not supported in IE8) -->
-<!-- Placed at the end of the document so the pages load faster -->
-<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js"></script>
-<script src="/assets/themes/mahout3/js/bootstrap.min.js"></script>
-</body>
-</html>
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7c0babd7/website/oldsite/_site/users/basics/creating-vectors.html
----------------------------------------------------------------------
diff --git a/website/oldsite/_site/users/basics/creating-vectors.html b/website/oldsite/_site/users/basics/creating-vectors.html
deleted file mode 100644
index 0206134..0000000
--- a/website/oldsite/_site/users/basics/creating-vectors.html
+++ /dev/null
@@ -1,274 +0,0 @@
-
-
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="utf-8">
-  <meta http-equiv="X-UA-Compatible" content="IE=edge">
-
-  <title>Creating Vectors</title>
-  
-  <meta name="author" content="The Apache Software Foundation">
-
-  <!-- Enable responsive viewport -->
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-
-  <!-- Bootstrap styles -->
-  <link href="/assets/themes/mahout3/css/bootstrap.min.css" rel="stylesheet">
-  <!-- Optional theme -->
-  <link href="/assets/themes/mahout3/css/bootstrap-theme.min.css" rel="stylesheet">
-  <!-- Sticky Footer -->
-  <link href="/assets/themes/mahout3/css/bs-sticky-footer.css" rel="stylesheet">
-
-  <!-- Custom styles -->
-  <link href="/assets/themes/mahout3/css/style.css" rel="stylesheet" type="text/css" media="all">
-
-  <!-- HTML5 Shim and Respond.js IE8 support of HTML5 elements and media queries -->
-  <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
-  <!--[if lt IE 9]>
-  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
-  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
-  <![endif]-->
-
-  <!-- Fav and touch icons -->
-  <!-- Update these with your own images
-    <link rel="shortcut icon" href="images/favicon.ico">
-    <link rel="apple-touch-icon" href="images/apple-touch-icon.png">
-    <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png">
-    <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png">
-  -->
-
-  <!-- atom & rss feed -->
-  <link href="/atom.xml" type="application/atom+xml" rel="alternate" title="Sitewide ATOM Feed">
-  <link href="/rss.xml" type="application/rss+xml" rel="alternate" title="Sitewide RSS Feed">
-  <script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    tex2jax: {
-      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
-    }
-  });
-  MathJax.Hub.Queue(function() {
-    var all = MathJax.Hub.getAllJax(), i;
-    for(i = 0; i < all.length; i += 1) {
-      all[i].SourceElement().parentNode.className += ' has-jax';
-    }
-  });
-  </script>
-  <script type="text/javascript">
-    var mathjax = document.createElement('script');
-    mathjax.type = 'text/javascript';
-    mathjax.async = true;
-
-    mathjax.src = ('https:' == document.location.protocol) ?
-        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' :
-        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
-
-      var s = document.getElementsByTagName('script')[0];
-    s.parentNode.insertBefore(mathjax, s);
-  </script>
-</head>
-
-<nav class="navbar navbar-default navbar-fixed-top">
-  <div class="container-fluid">
-    <!-- Brand and toggle get grouped for better mobile display -->
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <a class="navbar-brand" href="/">
-        <img src="/assets/img/Mahout-logo-82x100.png" height="30" alt="I'm mahout">
-      </a>
-    </div>
-
-    <!--<div class="nav-collapse collapse">-->
-<div class="collapse navbar-collapse" id="main-navbar">
-    <ul class="nav navbar-nav">
-        <!-- <li><a href="/">Home</a></li> -->
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/general/downloads.html">Downloads</a>
-                <li><a href="/general/who-we-are.html">Who we are</a>
-                <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
-                <li><a href="/general/release-notes.html">Release Notes</a>
-                <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
-                <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
-                <li><a href="/general/professional-support.html">Professional Support</a>
-                <li class="divider"></li>
-                <li class="nav-header">Resources</li>
-                <li><a href="/general/reference-reading.html">Reference Reading</a>
-                <li><a href="/general/faq.html">FAQ</a>
-                <li class="divider"></li>
-                <li class="nav-header">Legal</li>
-                <li><a href="http://www.apache.org/licenses/">License</a></li>
-                <li><a href="http://www.apache.org/security/">Security</a></li>
-                <li><a href="/general/privacy-policy.html">Privacy Policy</a>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/developers/developer-resources.html">Developer resources</a></li>
-                <li><a href="/developers/version-control.html">Version control</a></li>
-                <li><a href="/developers/buildingmahout.html">Build from source</a></li>
-                <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
-                <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">Contributions</li>
-                <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
-                <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
-                <li><a href="/developers/gsoc.html">GSoC</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">For committers</li>
-                <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
-                <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
-                <li><a href="/developers/github.html">Handling Github PRs</a></li>
-                <li><a href="/developers/how-to-release.html">How to release</a></li>
-                <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout-Samsara<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
-                <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
-                <li><a href="/users/flinkbindings/playing-with-samsara-flink.html">Flink Bindings Overview</a></li>
-                <li class="nav-header">Engines</li>
-                <li><a href="/users/sparkbindings/home.html">Spark</a></li>
-                <li><a href="/users/environment/h2o-internals.html">H2O</a></li>
-                <li><a href="/users/flinkbindings/flink-internals.html">Flink</a></li>
-                <li class="nav-header">References</li>
-                <li><a href="/users/environment/in-core-reference.html">In-Core Algebraic DSL Reference</a></li>
-                <li><a href="/users/environment/out-of-core-reference.html">Distributed Algebraic DSL Reference</a></li>
-                <li class="nav-header">Tutorials</li>
-                <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
-                <li><a href="/users/environment/how-to-build-an-app.html">How to build an app</a></li>
-                <li><a href="/users/environment/classify-a-doc-from-the-shell.html">Building a text classifier in Mahout's Spark Shell</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Algorithms<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/basics/algorithms.html">List of algorithms</a>
-                <li class="nav-header">Distributed Matrix Decomposition</li>
-                <li><a href="/users/algorithms/d-qr.html">Cholesky QR</a></li>
-                <li><a href="/users/algorithms/d-ssvd.html">SSVD</a></li>
-                <li><a href="/users/algorithms/d-als.html">Distributed ALS</a></li>
-                <li><a href="/users/algorithms/d-spca.html">SPCA</a></li>
-                <li class="nav-header">Recommendations</li>
-                <li><a href="/users/algorithms/recommender-overview.html">Recommender Overview</a></li>
-                <li><a href="/users/algorithms/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
-                <li class="nav-header">Classification</li>
-                <li><a href="/users/algorithms/spark-naive-bayes.html">Spark Naive Bayes</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">MapReduce Basics<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/basics/algorithms.html">List of algorithms</a>
-                <li><a href="/users/basics/quickstart.html">Overview</a>
-                <li class="divider"></li>
-                <li class="nav-header">Working with text</li>
-                <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
-                <li><a href="/users/basics/collocations.html">Collocations</a>
-                <li class="divider"></li>
-                <li class="nav-header">Dimensionality reduction</li>
-                <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
-                <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">Topic Models</li>
-                <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li class="nav-header">Classification</li>
-                <li><a href="/users/classification/bayesian.html">Naive Bayes</a></li>
-                <li><a href="/users/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
-                <li><a href="/users/classification/logistic-regression.html">Logistic Regression (Single Machine)</a></li>
-                <li><a href="/users/classification/partial-implementation.html">Random Forest</a></li>
-                <li class="nav-header">Classification Examples</li>
-                <li><a href="/users/classification/breiman-example.html">Breiman example</a></li>
-                <li><a href="/users/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
-                <li><a href="/users/classification/bankmarketing-example.html">SGD classifier bank marketing</a></li>
-                <li><a href="/users/classification/wikipedia-classifier-example.html">Wikipedia XML parser and classifier</a></li>
-                <li class="nav-header">Clustering</li>
-                <li><a href="/users/clustering/k-means-clustering.html">k-Means</a></li>
-                <li><a href="/users/clustering/canopy-clustering.html">Canopy</a></li>
-                <li><a href="/users/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
-                <li><a href="/users/clustering/streaming-k-means.html">Streaming KMeans</a></li>
-                <li><a href="/users/clustering/spectral-clustering.html">Spectral Clustering</a></li>
-                <li class="nav-header">Clustering Commandline usage</li>
-                <li><a href="/users/clustering/k-means-commandline.html">Options for k-Means</a></li>
-                <li><a href="/users/clustering/canopy-commandline.html">Options for Canopy</a></li>
-                <li><a href="/users/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
-                <li class="nav-header">Clustering Examples</li>
-                <li><a href="/users/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
-                <li class="nav-header">Cluster Post processing</li>
-                <li><a href="/users/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
-                <li><a href="/users/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
-                <li class="nav-header">Recommendations</li>
-                <li><a href="/users/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
-                <li><a href="/users/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
-                <li><a href="/users/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
-                <li><a href="/users/recommender/recommender-documentation.html">Overview</a></li>
-                <li><a href="/users/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
-                <li><a href="/users/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
-            </ul>
-        </li>
-        <!--  <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
-          <ul class="dropdown-menu">
-
-          </ul> -->
-        </li>
-    </ul>
-</div><!--/.nav-collapse -->
-  </div><!-- /.container-fluid -->
-</nav>
-
-<body>
-
-<div id="wrap">
-  <body class="">
-
-  <div class="container">
-    <p><a name="CreatingVectors-UtilitiesforCreatingVectors"></a></p>
-<h1 id="utilities-for-creating-vectors">Utilities for Creating Vectors</h1>
-
-<ol>
-  <li>
-    <p><a href="creating-vectors-from-text.html">Text</a> … utilities to turn plain text into Mahout vectors.</p>
-  </li>
-  <li>
-    <p>Mahout also has rudimentary support for the arff file format. See <a href="https://builds.apache.org/job/Mahout-Quality/ws/trunk/integration/target/site/apidocs/org/apache/mahout/utils/vectors/arff/package-summary.html">arff junit doc</a>.</p>
-  </li>
-  <li>
-    <p>There is also support for reading vectors from <a href="https://builds.apache.org/job/Mahout-Quality/ws/trunk/integration/target/site/apidocs/org/apache/mahout/utils/vectors/csv/package-summary.html">csv files</a>.</p>
-  </li>
-</ol>
-
-  </div>
-
-
-</div>
-
-<div id="footer">
-  <div class="container">
-    <p>&copy; 2017 The Apache Software Foundation
-      with help from <a href="http://jekyllbootstrap.com" target="_blank" title="The Definitive Jekyll Blogging Framework">Jekyll Bootstrap</a>
-      and <a href="http://getbootstrap.com" target="_blank">Bootstrap</a>
-    </p>
-  </div>
-</div>
-
-
-
-
-
-
-
-<!-- Latest compiled and minified JavaScript, requires jQuery 1.x (2.x not supported in IE8) -->
-<!-- Placed at the end of the document so the pages load faster -->
-<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js"></script>
-<script src="/assets/themes/mahout3/js/bootstrap.min.js"></script>
-</body>
-</html>
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7c0babd7/website/oldsite/_site/users/basics/gaussian-discriminative-analysis.html
----------------------------------------------------------------------
diff --git a/website/oldsite/_site/users/basics/gaussian-discriminative-analysis.html b/website/oldsite/_site/users/basics/gaussian-discriminative-analysis.html
deleted file mode 100644
index 0ca73d8..0000000
--- a/website/oldsite/_site/users/basics/gaussian-discriminative-analysis.html
+++ /dev/null
@@ -1,273 +0,0 @@
-
-
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="utf-8">
-  <meta http-equiv="X-UA-Compatible" content="IE=edge">
-
-  <title>Gaussian Discriminative Analysis</title>
-  
-  <meta name="author" content="The Apache Software Foundation">
-
-  <!-- Enable responsive viewport -->
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-
-  <!-- Bootstrap styles -->
-  <link href="/assets/themes/mahout3/css/bootstrap.min.css" rel="stylesheet">
-  <!-- Optional theme -->
-  <link href="/assets/themes/mahout3/css/bootstrap-theme.min.css" rel="stylesheet">
-  <!-- Sticky Footer -->
-  <link href="/assets/themes/mahout3/css/bs-sticky-footer.css" rel="stylesheet">
-
-  <!-- Custom styles -->
-  <link href="/assets/themes/mahout3/css/style.css" rel="stylesheet" type="text/css" media="all">
-
-  <!-- HTML5 Shim and Respond.js IE8 support of HTML5 elements and media queries -->
-  <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
-  <!--[if lt IE 9]>
-  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
-  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
-  <![endif]-->
-
-  <!-- Fav and touch icons -->
-  <!-- Update these with your own images
-    <link rel="shortcut icon" href="images/favicon.ico">
-    <link rel="apple-touch-icon" href="images/apple-touch-icon.png">
-    <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png">
-    <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png">
-  -->
-
-  <!-- atom & rss feed -->
-  <link href="/atom.xml" type="application/atom+xml" rel="alternate" title="Sitewide ATOM Feed">
-  <link href="/rss.xml" type="application/rss+xml" rel="alternate" title="Sitewide RSS Feed">
-  <script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    tex2jax: {
-      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
-    }
-  });
-  MathJax.Hub.Queue(function() {
-    var all = MathJax.Hub.getAllJax(), i;
-    for(i = 0; i < all.length; i += 1) {
-      all[i].SourceElement().parentNode.className += ' has-jax';
-    }
-  });
-  </script>
-  <script type="text/javascript">
-    var mathjax = document.createElement('script');
-    mathjax.type = 'text/javascript';
-    mathjax.async = true;
-
-    mathjax.src = ('https:' == document.location.protocol) ?
-        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' :
-        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
-
-      var s = document.getElementsByTagName('script')[0];
-    s.parentNode.insertBefore(mathjax, s);
-  </script>
-</head>
-
-<nav class="navbar navbar-default navbar-fixed-top">
-  <div class="container-fluid">
-    <!-- Brand and toggle get grouped for better mobile display -->
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <a class="navbar-brand" href="/">
-        <img src="/assets/img/Mahout-logo-82x100.png" height="30" alt="I'm mahout">
-      </a>
-    </div>
-
-    <!--<div class="nav-collapse collapse">-->
-<div class="collapse navbar-collapse" id="main-navbar">
-    <ul class="nav navbar-nav">
-        <!-- <li><a href="/">Home</a></li> -->
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/general/downloads.html">Downloads</a>
-                <li><a href="/general/who-we-are.html">Who we are</a>
-                <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
-                <li><a href="/general/release-notes.html">Release Notes</a>
-                <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
-                <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
-                <li><a href="/general/professional-support.html">Professional Support</a>
-                <li class="divider"></li>
-                <li class="nav-header">Resources</li>
-                <li><a href="/general/reference-reading.html">Reference Reading</a>
-                <li><a href="/general/faq.html">FAQ</a>
-                <li class="divider"></li>
-                <li class="nav-header">Legal</li>
-                <li><a href="http://www.apache.org/licenses/">License</a></li>
-                <li><a href="http://www.apache.org/security/">Security</a></li>
-                <li><a href="/general/privacy-policy.html">Privacy Policy</a>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/developers/developer-resources.html">Developer resources</a></li>
-                <li><a href="/developers/version-control.html">Version control</a></li>
-                <li><a href="/developers/buildingmahout.html">Build from source</a></li>
-                <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
-                <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">Contributions</li>
-                <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
-                <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
-                <li><a href="/developers/gsoc.html">GSoC</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">For committers</li>
-                <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
-                <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
-                <li><a href="/developers/github.html">Handling Github PRs</a></li>
-                <li><a href="/developers/how-to-release.html">How to release</a></li>
-                <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout-Samsara<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
-                <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
-                <li><a href="/users/flinkbindings/playing-with-samsara-flink.html">Flink Bindings Overview</a></li>
-                <li class="nav-header">Engines</li>
-                <li><a href="/users/sparkbindings/home.html">Spark</a></li>
-                <li><a href="/users/environment/h2o-internals.html">H2O</a></li>
-                <li><a href="/users/flinkbindings/flink-internals.html">Flink</a></li>
-                <li class="nav-header">References</li>
-                <li><a href="/users/environment/in-core-reference.html">In-Core Algebraic DSL Reference</a></li>
-                <li><a href="/users/environment/out-of-core-reference.html">Distributed Algebraic DSL Reference</a></li>
-                <li class="nav-header">Tutorials</li>
-                <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
-                <li><a href="/users/environment/how-to-build-an-app.html">How to build an app</a></li>
-                <li><a href="/users/environment/classify-a-doc-from-the-shell.html">Building a text classifier in Mahout's Spark Shell</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Algorithms<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/basics/algorithms.html">List of algorithms</a>
-                <li class="nav-header">Distributed Matrix Decomposition</li>
-                <li><a href="/users/algorithms/d-qr.html">Cholesky QR</a></li>
-                <li><a href="/users/algorithms/d-ssvd.html">SSVD</a></li>
-                <li><a href="/users/algorithms/d-als.html">Distributed ALS</a></li>
-                <li><a href="/users/algorithms/d-spca.html">SPCA</a></li>
-                <li class="nav-header">Recommendations</li>
-                <li><a href="/users/algorithms/recommender-overview.html">Recommender Overview</a></li>
-                <li><a href="/users/algorithms/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
-                <li class="nav-header">Classification</li>
-                <li><a href="/users/algorithms/spark-naive-bayes.html">Spark Naive Bayes</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">MapReduce Basics<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/basics/algorithms.html">List of algorithms</a>
-                <li><a href="/users/basics/quickstart.html">Overview</a>
-                <li class="divider"></li>
-                <li class="nav-header">Working with text</li>
-                <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
-                <li><a href="/users/basics/collocations.html">Collocations</a>
-                <li class="divider"></li>
-                <li class="nav-header">Dimensionality reduction</li>
-                <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
-                <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">Topic Models</li>
-                <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li class="nav-header">Classification</li>
-                <li><a href="/users/classification/bayesian.html">Naive Bayes</a></li>
-                <li><a href="/users/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
-                <li><a href="/users/classification/logistic-regression.html">Logistic Regression (Single Machine)</a></li>
-                <li><a href="/users/classification/partial-implementation.html">Random Forest</a></li>
-                <li class="nav-header">Classification Examples</li>
-                <li><a href="/users/classification/breiman-example.html">Breiman example</a></li>
-                <li><a href="/users/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
-                <li><a href="/users/classification/bankmarketing-example.html">SGD classifier bank marketing</a></li>
-                <li><a href="/users/classification/wikipedia-classifier-example.html">Wikipedia XML parser and classifier</a></li>
-                <li class="nav-header">Clustering</li>
-                <li><a href="/users/clustering/k-means-clustering.html">k-Means</a></li>
-                <li><a href="/users/clustering/canopy-clustering.html">Canopy</a></li>
-                <li><a href="/users/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
-                <li><a href="/users/clustering/streaming-k-means.html">Streaming KMeans</a></li>
-                <li><a href="/users/clustering/spectral-clustering.html">Spectral Clustering</a></li>
-                <li class="nav-header">Clustering Commandline usage</li>
-                <li><a href="/users/clustering/k-means-commandline.html">Options for k-Means</a></li>
-                <li><a href="/users/clustering/canopy-commandline.html">Options for Canopy</a></li>
-                <li><a href="/users/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
-                <li class="nav-header">Clustering Examples</li>
-                <li><a href="/users/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
-                <li class="nav-header">Cluster Post processing</li>
-                <li><a href="/users/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
-                <li><a href="/users/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
-                <li class="nav-header">Recommendations</li>
-                <li><a href="/users/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
-                <li><a href="/users/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
-                <li><a href="/users/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
-                <li><a href="/users/recommender/recommender-documentation.html">Overview</a></li>
-                <li><a href="/users/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
-                <li><a href="/users/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
-            </ul>
-        </li>
-        <!--  <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
-          <ul class="dropdown-menu">
-
-          </ul> -->
-        </li>
-    </ul>
-</div><!--/.nav-collapse -->
-  </div><!-- /.container-fluid -->
-</nav>
-
-<body>
-
-<div id="wrap">
-  <body class="">
-
-  <div class="container">
-    <p><a name="GaussianDiscriminativeAnalysis-GaussianDiscriminativeAnalysis"></a></p>
-<h1 id="gaussian-discriminative-analysis">Gaussian Discriminative Analysis</h1>
-
-<p>Gaussian Discriminative Analysis is a tool for multigroup classification
-based on extending linear discriminant analysis. The paper on the approach
-is located at http://citeseer.ist.psu.edu/4617.html (note, for some reason
-the paper is backwards, in that page 1 is at the end)</p>
-
-<p><a name="GaussianDiscriminativeAnalysis-Parallelizationstrategy"></a></p>
-<h2 id="parallelization-strategy">Parallelization strategy</h2>
-
-<p><a name="GaussianDiscriminativeAnalysis-Designofpackages"></a></p>
-<h2 id="design-of-packages">Design of packages</h2>
-
-  </div>
-
-
-</div>
-
-<div id="footer">
-  <div class="container">
-    <p>&copy; 2017 The Apache Software Foundation
-      with help from <a href="http://jekyllbootstrap.com" target="_blank" title="The Definitive Jekyll Blogging Framework">Jekyll Bootstrap</a>
-      and <a href="http://getbootstrap.com" target="_blank">Bootstrap</a>
-    </p>
-  </div>
-</div>
-
-
-
-
-
-
-
-<!-- Latest compiled and minified JavaScript, requires jQuery 1.x (2.x not supported in IE8) -->
-<!-- Placed at the end of the document so the pages load faster -->
-<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js"></script>
-<script src="/assets/themes/mahout3/js/bootstrap.min.js"></script>
-</body>
-</html>
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7c0babd7/website/oldsite/_site/users/basics/independent-component-analysis.html
----------------------------------------------------------------------
diff --git a/website/oldsite/_site/users/basics/independent-component-analysis.html b/website/oldsite/_site/users/basics/independent-component-analysis.html
deleted file mode 100644
index 55f46c1..0000000
--- a/website/oldsite/_site/users/basics/independent-component-analysis.html
+++ /dev/null
@@ -1,270 +0,0 @@
-
-
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="utf-8">
-  <meta http-equiv="X-UA-Compatible" content="IE=edge">
-
-  <title>Independent Component Analysis</title>
-  
-  <meta name="author" content="The Apache Software Foundation">
-
-  <!-- Enable responsive viewport -->
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-
-  <!-- Bootstrap styles -->
-  <link href="/assets/themes/mahout3/css/bootstrap.min.css" rel="stylesheet">
-  <!-- Optional theme -->
-  <link href="/assets/themes/mahout3/css/bootstrap-theme.min.css" rel="stylesheet">
-  <!-- Sticky Footer -->
-  <link href="/assets/themes/mahout3/css/bs-sticky-footer.css" rel="stylesheet">
-
-  <!-- Custom styles -->
-  <link href="/assets/themes/mahout3/css/style.css" rel="stylesheet" type="text/css" media="all">
-
-  <!-- HTML5 Shim and Respond.js IE8 support of HTML5 elements and media queries -->
-  <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
-  <!--[if lt IE 9]>
-  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
-  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
-  <![endif]-->
-
-  <!-- Fav and touch icons -->
-  <!-- Update these with your own images
-    <link rel="shortcut icon" href="images/favicon.ico">
-    <link rel="apple-touch-icon" href="images/apple-touch-icon.png">
-    <link rel="apple-touch-icon" sizes="72x72" href="images/apple-touch-icon-72x72.png">
-    <link rel="apple-touch-icon" sizes="114x114" href="images/apple-touch-icon-114x114.png">
-  -->
-
-  <!-- atom & rss feed -->
-  <link href="/atom.xml" type="application/atom+xml" rel="alternate" title="Sitewide ATOM Feed">
-  <link href="/rss.xml" type="application/rss+xml" rel="alternate" title="Sitewide RSS Feed">
-  <script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    tex2jax: {
-      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
-    }
-  });
-  MathJax.Hub.Queue(function() {
-    var all = MathJax.Hub.getAllJax(), i;
-    for(i = 0; i < all.length; i += 1) {
-      all[i].SourceElement().parentNode.className += ' has-jax';
-    }
-  });
-  </script>
-  <script type="text/javascript">
-    var mathjax = document.createElement('script');
-    mathjax.type = 'text/javascript';
-    mathjax.async = true;
-
-    mathjax.src = ('https:' == document.location.protocol) ?
-        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' :
-        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
-
-      var s = document.getElementsByTagName('script')[0];
-    s.parentNode.insertBefore(mathjax, s);
-  </script>
-</head>
-
-<nav class="navbar navbar-default navbar-fixed-top">
-  <div class="container-fluid">
-    <!-- Brand and toggle get grouped for better mobile display -->
-    <div class="navbar-header">
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-      <a class="navbar-brand" href="/">
-        <img src="/assets/img/Mahout-logo-82x100.png" height="30" alt="I'm mahout">
-      </a>
-    </div>
-
-    <!--<div class="nav-collapse collapse">-->
-<div class="collapse navbar-collapse" id="main-navbar">
-    <ul class="nav navbar-nav">
-        <!-- <li><a href="/">Home</a></li> -->
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/general/downloads.html">Downloads</a>
-                <li><a href="/general/who-we-are.html">Who we are</a>
-                <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
-                <li><a href="/general/release-notes.html">Release Notes</a>
-                <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
-                <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
-                <li><a href="/general/professional-support.html">Professional Support</a>
-                <li class="divider"></li>
-                <li class="nav-header">Resources</li>
-                <li><a href="/general/reference-reading.html">Reference Reading</a>
-                <li><a href="/general/faq.html">FAQ</a>
-                <li class="divider"></li>
-                <li class="nav-header">Legal</li>
-                <li><a href="http://www.apache.org/licenses/">License</a></li>
-                <li><a href="http://www.apache.org/security/">Security</a></li>
-                <li><a href="/general/privacy-policy.html">Privacy Policy</a>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/developers/developer-resources.html">Developer resources</a></li>
-                <li><a href="/developers/version-control.html">Version control</a></li>
-                <li><a href="/developers/buildingmahout.html">Build from source</a></li>
-                <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
-                <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">Contributions</li>
-                <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
-                <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
-                <li><a href="/developers/gsoc.html">GSoC</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">For committers</li>
-                <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
-                <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
-                <li><a href="/developers/github.html">Handling Github PRs</a></li>
-                <li><a href="/developers/how-to-release.html">How to release</a></li>
-                <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout-Samsara<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
-                <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
-                <li><a href="/users/flinkbindings/playing-with-samsara-flink.html">Flink Bindings Overview</a></li>
-                <li class="nav-header">Engines</li>
-                <li><a href="/users/sparkbindings/home.html">Spark</a></li>
-                <li><a href="/users/environment/h2o-internals.html">H2O</a></li>
-                <li><a href="/users/flinkbindings/flink-internals.html">Flink</a></li>
-                <li class="nav-header">References</li>
-                <li><a href="/users/environment/in-core-reference.html">In-Core Algebraic DSL Reference</a></li>
-                <li><a href="/users/environment/out-of-core-reference.html">Distributed Algebraic DSL Reference</a></li>
-                <li class="nav-header">Tutorials</li>
-                <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
-                <li><a href="/users/environment/how-to-build-an-app.html">How to build an app</a></li>
-                <li><a href="/users/environment/classify-a-doc-from-the-shell.html">Building a text classifier in Mahout's Spark Shell</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Algorithms<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/basics/algorithms.html">List of algorithms</a>
-                <li class="nav-header">Distributed Matrix Decomposition</li>
-                <li><a href="/users/algorithms/d-qr.html">Cholesky QR</a></li>
-                <li><a href="/users/algorithms/d-ssvd.html">SSVD</a></li>
-                <li><a href="/users/algorithms/d-als.html">Distributed ALS</a></li>
-                <li><a href="/users/algorithms/d-spca.html">SPCA</a></li>
-                <li class="nav-header">Recommendations</li>
-                <li><a href="/users/algorithms/recommender-overview.html">Recommender Overview</a></li>
-                <li><a href="/users/algorithms/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
-                <li class="nav-header">Classification</li>
-                <li><a href="/users/algorithms/spark-naive-bayes.html">Spark Naive Bayes</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">MapReduce Basics<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li><a href="/users/basics/algorithms.html">List of algorithms</a>
-                <li><a href="/users/basics/quickstart.html">Overview</a>
-                <li class="divider"></li>
-                <li class="nav-header">Working with text</li>
-                <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
-                <li><a href="/users/basics/collocations.html">Collocations</a>
-                <li class="divider"></li>
-                <li class="nav-header">Dimensionality reduction</li>
-                <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
-                <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
-                <li class="divider"></li>
-                <li class="nav-header">Topic Models</li>
-                <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
-            </ul>
-        </li>
-        <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Mahout MapReduce<b class="caret"></b></a>
-            <ul class="dropdown-menu">
-                <li class="nav-header">Classification</li>
-                <li><a href="/users/classification/bayesian.html">Naive Bayes</a></li>
-                <li><a href="/users/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
-                <li><a href="/users/classification/logistic-regression.html">Logistic Regression (Single Machine)</a></li>
-                <li><a href="/users/classification/partial-implementation.html">Random Forest</a></li>
-                <li class="nav-header">Classification Examples</li>
-                <li><a href="/users/classification/breiman-example.html">Breiman example</a></li>
-                <li><a href="/users/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
-                <li><a href="/users/classification/bankmarketing-example.html">SGD classifier bank marketing</a></li>
-                <li><a href="/users/classification/wikipedia-classifier-example.html">Wikipedia XML parser and classifier</a></li>
-                <li class="nav-header">Clustering</li>
-                <li><a href="/users/clustering/k-means-clustering.html">k-Means</a></li>
-                <li><a href="/users/clustering/canopy-clustering.html">Canopy</a></li>
-                <li><a href="/users/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
-                <li><a href="/users/clustering/streaming-k-means.html">Streaming KMeans</a></li>
-                <li><a href="/users/clustering/spectral-clustering.html">Spectral Clustering</a></li>
-                <li class="nav-header">Clustering Commandline usage</li>
-                <li><a href="/users/clustering/k-means-commandline.html">Options for k-Means</a></li>
-                <li><a href="/users/clustering/canopy-commandline.html">Options for Canopy</a></li>
-                <li><a href="/users/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
-                <li class="nav-header">Clustering Examples</li>
-                <li><a href="/users/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
-                <li class="nav-header">Cluster Post processing</li>
-                <li><a href="/users/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
-                <li><a href="/users/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
-                <li class="nav-header">Recommendations</li>
-                <li><a href="/users/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
-                <li><a href="/users/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
-                <li><a href="/users/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
-                <li><a href="/users/recommender/recommender-documentation.html">Overview</a></li>
-                <li><a href="/users/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
-                <li><a href="/users/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
-            </ul>
-        </li>
-        <!--  <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
-          <ul class="dropdown-menu">
-
-          </ul> -->
-        </li>
-    </ul>
-</div><!--/.nav-collapse -->
-  </div><!-- /.container-fluid -->
-</nav>
-
-<body>
-
-<div id="wrap">
-  <body class="">
-
-  <div class="container">
-    <p><a name="IndependentComponentAnalysis-IndependentComponentAnalysis"></a></p>
-<h1 id="independent-component-analysis">Independent Component Analysis</h1>
-
-<p>See also: Principal Component Analysis.</p>
-
-<p><a name="IndependentComponentAnalysis-Parallelizationstrategy"></a></p>
-<h2 id="parallelization-strategy">Parallelization strategy</h2>
-
-<p><a name="IndependentComponentAnalysis-Designofpackages"></a></p>
-<h2 id="design-of-packages">Design of packages</h2>
-
-  </div>
-
-
-</div>
-
-<div id="footer">
-  <div class="container">
-    <p>&copy; 2017 The Apache Software Foundation
-      with help from <a href="http://jekyllbootstrap.com" target="_blank" title="The Definitive Jekyll Blogging Framework">Jekyll Bootstrap</a>
-      and <a href="http://getbootstrap.com" target="_blank">Bootstrap</a>
-    </p>
-  </div>
-</div>
-
-
-
-
-
-
-
-<!-- Latest compiled and minified JavaScript, requires jQuery 1.x (2.x not supported in IE8) -->
-<!-- Placed at the end of the document so the pages load faster -->
-<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js"></script>
-<script src="/assets/themes/mahout3/js/bootstrap.min.js"></script>
-</body>
-</html>
-


Mime
View raw message