mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From build...@apache.org
Subject svn commit: r944380 [15/24] - in /websites/staging/mahout/trunk/content: ./ developers/ general/ users/basics/ users/classification/ users/clustering/ users/dim-reduction/ users/mapreduce/ users/mapreduce/classification/ users/mapreduce/clustering/ use...
Date Thu, 19 Mar 2015 21:21:47 GMT
Added: websites/staging/mahout/trunk/content/users/mapreduce/clustering/fuzzy-k-means-commandline.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/mapreduce/clustering/fuzzy-k-means-commandline.html (added)
+++ websites/staging/mahout/trunk/content/users/mapreduce/clustering/fuzzy-k-means-commandline.html Thu Mar 19 21:21:45 2015
@@ -0,0 +1,366 @@
+<!DOCTYPE html>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <title>Apache Mahout: Scalable machine learning and data mining</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="Distribution" content="Global">
+  <meta name="Robots" content="index,follow">
+  <meta name="keywords" content="apache, apache hadoop, apache lucene,
+        business data mining, cluster analysis,
+        collaborative filtering, data extraction, data filtering, data framework, data integration,
+        data matching, data mining, data mining algorithms, data mining analysis, data mining data,
+        data mining introduction, data mining software,
+        data mining techniques, data representation, data set, datamining,
+        feature extraction, fuzzy k means, genetic algorithm, hadoop,
+        hierarchical clustering, high dimensional, introduction to data mining, kmeans,
+        knowledge discovery, learning approach, learning approaches, learning methods,
+        learning techniques, lucene, machine learning, machine translation, mahout apache,
+        mahout taste, map reduce hadoop, mining data, mining methods, naive bayes,
+        natural language processing,
+        supervised, text mining, time series data, unsupervised, web data mining">
+  <link rel="shortcut icon" type="image/x-icon" href="http://mahout.apache.org/images/favicon.ico">
+  <script type="text/javascript" src="/js/prototype.js"></script>
+  <script type="text/javascript" src="/js/effects.js"></script>
+  <script type="text/javascript" src="/js/search.js"></script>
+  <script type="text/javascript" src="/js/slides.js"></script>
+
+  <link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
+  <link href="/css/bootstrap-responsive.css" rel="stylesheet">
+  <link rel="stylesheet" href="/css/global.css" type="text/css">
+
+  <!-- mathJax stuff -- use `\(...\)` for inline style math in markdown -->
+  <script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    tex2jax: {
+      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
+    }
+  });
+  MathJax.Hub.Queue(function() {
+    var all = MathJax.Hub.getAllJax(), i;
+    for(i = 0; i < all.length; i += 1) {
+      all[i].SourceElement().parentNode.className += ' has-jax';
+    }
+  });
+  </script>
+  <script type="text/javascript">
+    var mathjax = document.createElement('script'); 
+    mathjax.type = 'text/javascript'; 
+    mathjax.async = true;
+
+    mathjax.src = ('https:' == document.location.protocol) ?
+        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' : 
+        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+	
+	  var s = document.getElementsByTagName('script')[0]; 
+    s.parentNode.insertBefore(mathjax, s);
+  </script>
+</head>
+
+<body id="home" data-twttr-rendered="true">
+  <div id="wrap">
+   <div id="header">
+    <div id="logo"><a href="/overview.html"></a></div>
+  <div id="search">
+    <form id="search-form" action="http://www.google.com/search" method="get" class="navbar-search pull-right">    
+      <input value="http://mahout.apache.org" name="sitesearch" type="hidden">
+      <input class="search-query" name="q" id="query" type="text">
+      <input id="submission" type="image" src="/images/mahout-lupe.png" alt="Search" />
+    </form>
+  </div>
+
+    <div class="navbar navbar-inverse" style="position:absolute;top:133px;padding-right:0px;padding-left:0px;">
+      <div class="navbar-inner" style="border: none; background: #999; border: none; border-radius: 0px;">
+        <div class="container">
+          <button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <!-- <a class="brand" href="#">Apache Community Development Project</a> -->
+          <div class="nav-collapse collapse">
+            <ul class="nav">
+              <li><a href="/">Home</a></li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/general/downloads.html">Downloads</a>
+                  <li><a href="/general/who-we-are.html">Who we are</a>
+                  <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
+                  <li><a href="/general/release-notes.html">Release Notes</a> 
+                  <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
+                  <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
+                  <li><a href="/general/professional-support.html">Professional Support</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Resources</li>
+                  <li><a href="/general/reference-reading.html">Reference Reading</a>
+                  <li><a href="/general/faq.html">FAQ</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Legal</li>
+                  <li><a href="http://www.apache.org/licenses/">License</a></li>
+                  <li><a href="http://www.apache.org/security/">Security</a></li>
+                  <li><a href="/general/privacy-policy.html">Privacy Policy</a>
+                </ul>
+              </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/developers/developer-resources.html">Developer resources</a></li>
+                  <li><a href="/developers/version-control.html">Version control</a></li>
+                  <li><a href="/developers/buildingmahout.html">Build from source</a></li>
+                  <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
+                  <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Contributions</li>
+                  <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
+                  <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
+                  <li><a href="/developers/gsoc.html">GSoC</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">For committers</li>
+                  <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
+                  <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
+                  <li><a href="/developers/github.html">Handling Github PRs</a></li>
+                  <li><a href="/developers/how-to-release.html">How to release</a></li>
+                  <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Basics<b class="caret"></b></a>
+                 <ul class="dropdown-menu">
+                  <li><a href="/users/basics/algorithms.html">List of algorithms</a>
+                  <li><a href="/users/basics/quickstart.html">Quickstart</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Working with text</li>
+                  <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
+                  <li><a href="/users/basics/collocations.html">Collocations</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Dimensionality reduction</li>
+                  <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
+                  <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Topic Models</li>      
+                  <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
+                </ul>
+                 </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Spark<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
+                  <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
+			      <li class="divider"></li>
+                  <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
+                </ul>
+               </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Classification<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
+                  <li><a href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
+                  <li><a href="/users/mapreduce/classification/logistic-regression.html">Logistic Regression</a></li>
+                  <li><a href="/users/mapreduce/classification/partial-implementation.html">Random Forest</a></li>
+
+                  <li class="divider"></li>
+                  <li class="nav-header">Examples</li>
+                  <li><a href="/users/mapreduce/classification/breiman-example.html">Breiman example</a></li>
+                  <li><a href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
+                </ul></li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Clustering<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/streaming-k-means.html">Streaming KMeans</a></li>
+                <li><a href="/users/mapreduce/clustering/spectral-clustering.html">Spectral Clustering</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Commandline usage</li>
+                <li><a href="/users/mapreduce/clustering/k-means-commandline.html">Options for k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-commandline.html">Options for Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Examples</li>
+                <li><a href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Post processing</li>
+                <li><a href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
+                <li><a href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
+                </ul></li>
+                <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
+                <li><a href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
+		<li><a href="/users/mapreduce/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Hadoop</li>
+                <li><a href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
+                <li><a href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
+                <li class="nav-header">Spark</li>
+                <li><a href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
+              </ul>
+            </li>
+           </ul>
+          </div><!--/.nav-collapse -->
+        </div>
+      </div>
+    </div>
+
+</div>
+
+ <div id="sidebar">
+  <div id="sidebar-wrap">
+    <h2>Twitter</h2>
+	<ul class="sidemenu">
+		<li>
+<a class="twitter-timeline" href="https://twitter.com/ApacheMahout" data-widget-id="422861673444028416">Tweets by @ApacheMahout</a>
+<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+"://platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
+</li>
+	</ul>
+    <h2>Apache Software Foundation</h2>
+    <ul class="sidemenu">
+      <li><a href="http://www.apache.org/foundation/how-it-works.html">How the ASF works</a></li>
+      <li><a href="http://www.apache.org/foundation/getinvolved.html">Get Involved</a></li>
+      <li><a href="http://www.apache.org/dev/">Developer Resources</a></li>
+      <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+      <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+    </ul>
+    <h2>Related Projects</h2>
+    <ul class="sidemenu">
+      <li><a href="http://lucene.apache.org/">Lucene</a></li>
+      <li><a href="http://hadoop.apache.org/">Hadoop</a></li>
+    </ul>
+  </div>
+</div>
+
+  <div id="content-wrap" class="clearfix">
+   <div id="main">
+    <p><a name="fuzzy-k-means-commandline-RunningFuzzyk-MeansClusteringfromtheCommandLine"></a></p>
+<h1 id="running-fuzzy-k-means-clustering-from-the-command-line">Running Fuzzy k-Means Clustering from the Command Line</h1>
+<p>Mahout's Fuzzy k-Means clustering can be launched from the same command
+line invocation whether you are running on a single machine in stand-alone
+mode or on a larger Hadoop cluster. The difference is determined by the
+$HADOOP_HOME and $HADOOP_CONF_DIR environment variables. If both are set to
+an operating Hadoop cluster on the target machine then the invocation will
+run FuzzyK on that cluster. If either of the environment variables are
+missing then the stand-alone Hadoop configuration will be invoked instead.</p>
+<div class="codehilite"><pre><span class="o">./</span><span class="n">bin</span><span class="o">/</span><span class="n">mahout</span> <span class="n">fkmeans</span> <span class="o">&lt;</span><span class="n">OPTIONS</span><span class="o">&gt;</span>
+</pre></div>
+
+
+<ul>
+<li>In $MAHOUT_HOME/, build the jar containing the job (mvn install) The job
+will be generated in $MAHOUT_HOME/core/target/ and it's name will contain
+the Mahout version number. For example, when using Mahout 0.3 release, the
+job will be mahout-core-0.3.job</li>
+</ul>
+<p><a name="fuzzy-k-means-commandline-Testingitononesinglemachinew/ocluster"></a></p>
+<h2 id="testing-it-on-one-single-machine-wo-cluster">Testing it on one single machine w/o cluster</h2>
+<ul>
+<li>Put the data: cp <PATH TO DATA> testdata</li>
+<li>
+<p>Run the Job: </p>
+<p>./bin/mahout fkmeans -i testdata <OPTIONS></p>
+</li>
+</ul>
+<p><a name="fuzzy-k-means-commandline-Runningitonthecluster"></a></p>
+<h2 id="running-it-on-the-cluster">Running it on the cluster</h2>
+<ul>
+<li>(As needed) Start up Hadoop: $HADOOP_HOME/bin/start-all.sh</li>
+<li>Put the data: $HADOOP_HOME/bin/hadoop fs -put <PATH TO DATA> testdata</li>
+<li>
+<p>Run the Job: </p>
+<p>export HADOOP_HOME=<Hadoop Home Directory>
+export HADOOP_CONF_DIR=$HADOOP_HOME/conf
+./bin/mahout fkmeans -i testdata <OPTIONS></p>
+</li>
+<li>
+<p>Get the data out of HDFS and have a look. Use bin/hadoop fs -lsr output
+to view all outputs.</p>
+</li>
+</ul>
+<p><a name="fuzzy-k-means-commandline-Commandlineoptions"></a></p>
+<h1 id="command-line-options">Command line options</h1>
+<div class="codehilite"><pre>  <span class="o">--</span><span class="n">input</span> <span class="p">(</span><span class="o">-</span><span class="nb">i</span><span class="p">)</span> <span class="n">input</span>                   <span class="n">Path</span> <span class="n">to</span> <span class="n">job</span> <span class="n">input</span> <span class="n">directory</span><span class="p">.</span> 
+                           <span class="n">Must</span> <span class="n">be</span> <span class="n">a</span> <span class="n">SequenceFile</span> <span class="n">of</span>    
+                           <span class="n">VectorWritable</span>           
+  <span class="o">--</span><span class="n">clusters</span> <span class="p">(</span><span class="o">-</span><span class="n">c</span><span class="p">)</span> <span class="n">clusters</span>             <span class="n">The</span> <span class="n">input</span> <span class="n">centroids</span><span class="p">,</span> <span class="n">as</span> <span class="n">Vectors</span><span class="p">.</span> 
+                           <span class="n">Must</span> <span class="n">be</span> <span class="n">a</span> <span class="n">SequenceFile</span> <span class="n">of</span>    
+                           <span class="n">Writable</span><span class="p">,</span> <span class="n">Cluster</span><span class="o">/</span><span class="n">Canopy</span><span class="p">.</span> <span class="n">If</span> <span class="n">k</span>  
+                           <span class="n">is</span> <span class="n">also</span> <span class="n">specified</span><span class="p">,</span> <span class="n">then</span> <span class="n">a</span> <span class="n">random</span> 
+                           <span class="n">set</span> <span class="n">of</span> <span class="n">vectors</span> <span class="n">will</span> <span class="n">be</span> <span class="n">selected</span>  
+                           <span class="n">and</span> <span class="n">written</span> <span class="n">out</span> <span class="n">to</span> <span class="n">this</span> <span class="n">path</span> 
+                           <span class="n">first</span>                
+  <span class="o">--</span><span class="n">output</span> <span class="p">(</span><span class="o">-</span><span class="n">o</span><span class="p">)</span> <span class="n">output</span>                 <span class="n">The</span> <span class="n">directory</span> <span class="n">pathname</span> <span class="k">for</span>   
+                           <span class="n">output</span><span class="p">.</span>              
+  <span class="o">--</span><span class="n">distanceMeasure</span> <span class="p">(</span><span class="o">-</span><span class="n">dm</span><span class="p">)</span> <span class="n">distanceMeasure</span>      <span class="n">The</span> <span class="n">classname</span> <span class="n">of</span> <span class="n">the</span>     
+                           <span class="n">DistanceMeasure</span><span class="p">.</span> <span class="n">Default</span> <span class="n">is</span>  
+                           <span class="n">SquaredEuclidean</span>         
+  <span class="o">--</span><span class="n">convergenceDelta</span> <span class="p">(</span><span class="o">-</span><span class="n">cd</span><span class="p">)</span> <span class="n">convergenceDelta</span>    <span class="n">The</span> <span class="n">convergence</span> <span class="n">delta</span> <span class="n">value</span><span class="p">.</span> 
+                           <span class="n">Default</span> <span class="n">is</span> 0<span class="p">.</span>5           
+  <span class="o">--</span><span class="n">maxIter</span> <span class="p">(</span><span class="o">-</span><span class="n">x</span><span class="p">)</span> <span class="n">maxIter</span>               <span class="n">The</span> <span class="n">maximum</span> <span class="n">number</span> <span class="n">of</span>        
+                           <span class="n">iterations</span><span class="p">.</span>          
+  <span class="o">--</span><span class="n">k</span> <span class="p">(</span><span class="o">-</span><span class="n">k</span><span class="p">)</span> <span class="n">k</span>                       <span class="n">The</span> <span class="n">k</span> <span class="n">in</span> <span class="n">k</span><span class="o">-</span><span class="n">Means</span><span class="p">.</span>  <span class="n">If</span> <span class="n">specified</span><span class="p">,</span> 
+                           <span class="n">then</span> <span class="n">a</span> <span class="n">random</span> <span class="n">selection</span> <span class="n">of</span> <span class="n">k</span> 
+                           <span class="n">Vectors</span> <span class="n">will</span> <span class="n">be</span> <span class="n">chosen</span> <span class="n">as</span> <span class="n">the</span>
+                               <span class="n">Centroid</span> <span class="n">and</span> <span class="n">written</span> <span class="n">to</span> <span class="n">the</span>  
+                           <span class="n">clusters</span> <span class="n">input</span> <span class="n">path</span><span class="p">.</span>     
+  <span class="o">--</span><span class="n">m</span> <span class="p">(</span><span class="o">-</span><span class="n">m</span><span class="p">)</span> <span class="n">m</span>                       <span class="n">coefficient</span> <span class="n">normalization</span>    
+                           <span class="nb">factor</span><span class="p">,</span> <span class="n">must</span> <span class="n">be</span> <span class="n">greater</span> <span class="n">than</span> 1   
+  <span class="o">--</span><span class="n">overwrite</span> <span class="p">(</span><span class="o">-</span><span class="n">ow</span><span class="p">)</span>                <span class="n">If</span> <span class="n">present</span><span class="p">,</span> <span class="n">overwrite</span> <span class="n">the</span> <span class="n">output</span> 
+                           <span class="n">directory</span> <span class="n">before</span> <span class="n">running</span> <span class="n">job</span> 
+  <span class="o">--</span><span class="n">help</span> <span class="p">(</span><span class="o">-</span><span class="n">h</span><span class="p">)</span>                      <span class="n">Print</span> <span class="n">out</span> <span class="n">help</span>           
+  <span class="o">--</span><span class="n">numMap</span> <span class="p">(</span><span class="o">-</span><span class="n">u</span><span class="p">)</span> <span class="n">numMap</span>                 <span class="n">The</span> <span class="n">number</span> <span class="n">of</span> <span class="n">map</span> <span class="n">tasks</span><span class="p">.</span>     
+                           <span class="n">Defaults</span> <span class="n">to</span> 10           
+  <span class="o">--</span><span class="n">maxRed</span> <span class="p">(</span><span class="o">-</span><span class="n">r</span><span class="p">)</span> <span class="n">maxRed</span>                 <span class="n">The</span> <span class="n">number</span> <span class="n">of</span> <span class="n">reduce</span> <span class="n">tasks</span><span class="p">.</span>  
+                           <span class="n">Defaults</span> <span class="n">to</span> 2            
+  <span class="o">--</span><span class="n">emitMostLikely</span> <span class="p">(</span><span class="o">-</span><span class="n">e</span><span class="p">)</span> <span class="n">emitMostLikely</span>         <span class="n">True</span> <span class="k">if</span> <span class="n">clustering</span> <span class="n">should</span> <span class="n">emit</span>   
+                           <span class="n">the</span> <span class="n">most</span> <span class="n">likely</span> <span class="n">point</span> <span class="n">only</span><span class="p">,</span>  
+                           <span class="n">false</span> <span class="k">for</span> <span class="n">threshold</span> <span class="n">clustering</span><span class="p">.</span>  
+                           <span class="n">Default</span> <span class="n">is</span> <span class="n">true</span>          
+  <span class="o">--</span><span class="n">threshold</span> <span class="p">(</span><span class="o">-</span><span class="n">t</span><span class="p">)</span> <span class="n">threshold</span>               <span class="n">The</span> <span class="n">pdf</span> <span class="n">threshold</span> <span class="n">used</span> <span class="k">for</span>   
+                           <span class="n">cluster</span> <span class="n">determination</span><span class="p">.</span> <span class="n">Default</span>   
+                           <span class="n">is</span> 0 
+  <span class="o">--</span><span class="n">clustering</span> <span class="p">(</span><span class="o">-</span><span class="n">cl</span><span class="p">)</span>                   <span class="n">If</span> <span class="n">present</span><span class="p">,</span> <span class="n">run</span> <span class="n">clustering</span> <span class="n">after</span> 
+                           <span class="n">the</span> <span class="n">iterations</span> <span class="n">have</span> <span class="n">taken</span> <span class="n">place</span>
+</pre></div>
+   </div>
+  </div>     
+</div> 
+  <footer class="footer" align="center">
+    <div class="container">
+      <p>
+        Copyright &copy; 2014 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+        Apache and the Apache feather logos are trademarks of The Apache Software Foundation.
+      </p>
+    </div>
+  </footer>
+  
+  <script src="/js/jquery-1.9.1.min.js"></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script>
+    (function() {
+      var cx = '012254517474945470291:vhsfv7eokdc';
+      var gcse = document.createElement('script');
+      gcse.type = 'text/javascript';
+      gcse.async = true;
+      gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
+          '//www.google.com/cse/cse.js?cx=' + cx;
+      var s = document.getElementsByTagName('script')[0];
+      s.parentNode.insertBefore(gcse, s);
+    })();
+  </script>
+</body>
+</html>

Added: websites/staging/mahout/trunk/content/users/mapreduce/clustering/fuzzy-k-means.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/mapreduce/clustering/fuzzy-k-means.html (added)
+++ websites/staging/mahout/trunk/content/users/mapreduce/clustering/fuzzy-k-means.html Thu Mar 19 21:21:45 2015
@@ -0,0 +1,434 @@
+<!DOCTYPE html>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <title>Apache Mahout: Scalable machine learning and data mining</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="Distribution" content="Global">
+  <meta name="Robots" content="index,follow">
+  <meta name="keywords" content="apache, apache hadoop, apache lucene,
+        business data mining, cluster analysis,
+        collaborative filtering, data extraction, data filtering, data framework, data integration,
+        data matching, data mining, data mining algorithms, data mining analysis, data mining data,
+        data mining introduction, data mining software,
+        data mining techniques, data representation, data set, datamining,
+        feature extraction, fuzzy k means, genetic algorithm, hadoop,
+        hierarchical clustering, high dimensional, introduction to data mining, kmeans,
+        knowledge discovery, learning approach, learning approaches, learning methods,
+        learning techniques, lucene, machine learning, machine translation, mahout apache,
+        mahout taste, map reduce hadoop, mining data, mining methods, naive bayes,
+        natural language processing,
+        supervised, text mining, time series data, unsupervised, web data mining">
+  <link rel="shortcut icon" type="image/x-icon" href="http://mahout.apache.org/images/favicon.ico">
+  <script type="text/javascript" src="/js/prototype.js"></script>
+  <script type="text/javascript" src="/js/effects.js"></script>
+  <script type="text/javascript" src="/js/search.js"></script>
+  <script type="text/javascript" src="/js/slides.js"></script>
+
+  <link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
+  <link href="/css/bootstrap-responsive.css" rel="stylesheet">
+  <link rel="stylesheet" href="/css/global.css" type="text/css">
+
+  <!-- mathJax stuff -- use `\(...\)` for inline style math in markdown -->
+  <script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    tex2jax: {
+      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
+    }
+  });
+  MathJax.Hub.Queue(function() {
+    var all = MathJax.Hub.getAllJax(), i;
+    for(i = 0; i < all.length; i += 1) {
+      all[i].SourceElement().parentNode.className += ' has-jax';
+    }
+  });
+  </script>
+  <script type="text/javascript">
+    var mathjax = document.createElement('script'); 
+    mathjax.type = 'text/javascript'; 
+    mathjax.async = true;
+
+    mathjax.src = ('https:' == document.location.protocol) ?
+        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' : 
+        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+	
+	  var s = document.getElementsByTagName('script')[0]; 
+    s.parentNode.insertBefore(mathjax, s);
+  </script>
+</head>
+
+<body id="home" data-twttr-rendered="true">
+  <div id="wrap">
+   <div id="header">
+    <div id="logo"><a href="/overview.html"></a></div>
+  <div id="search">
+    <form id="search-form" action="http://www.google.com/search" method="get" class="navbar-search pull-right">    
+      <input value="http://mahout.apache.org" name="sitesearch" type="hidden">
+      <input class="search-query" name="q" id="query" type="text">
+      <input id="submission" type="image" src="/images/mahout-lupe.png" alt="Search" />
+    </form>
+  </div>
+
+    <div class="navbar navbar-inverse" style="position:absolute;top:133px;padding-right:0px;padding-left:0px;">
+      <div class="navbar-inner" style="border: none; background: #999; border: none; border-radius: 0px;">
+        <div class="container">
+          <button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <!-- <a class="brand" href="#">Apache Community Development Project</a> -->
+          <div class="nav-collapse collapse">
+            <ul class="nav">
+              <li><a href="/">Home</a></li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/general/downloads.html">Downloads</a>
+                  <li><a href="/general/who-we-are.html">Who we are</a>
+                  <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
+                  <li><a href="/general/release-notes.html">Release Notes</a> 
+                  <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
+                  <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
+                  <li><a href="/general/professional-support.html">Professional Support</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Resources</li>
+                  <li><a href="/general/reference-reading.html">Reference Reading</a>
+                  <li><a href="/general/faq.html">FAQ</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Legal</li>
+                  <li><a href="http://www.apache.org/licenses/">License</a></li>
+                  <li><a href="http://www.apache.org/security/">Security</a></li>
+                  <li><a href="/general/privacy-policy.html">Privacy Policy</a>
+                </ul>
+              </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/developers/developer-resources.html">Developer resources</a></li>
+                  <li><a href="/developers/version-control.html">Version control</a></li>
+                  <li><a href="/developers/buildingmahout.html">Build from source</a></li>
+                  <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
+                  <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Contributions</li>
+                  <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
+                  <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
+                  <li><a href="/developers/gsoc.html">GSoC</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">For committers</li>
+                  <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
+                  <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
+                  <li><a href="/developers/github.html">Handling Github PRs</a></li>
+                  <li><a href="/developers/how-to-release.html">How to release</a></li>
+                  <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Basics<b class="caret"></b></a>
+                 <ul class="dropdown-menu">
+                  <li><a href="/users/basics/algorithms.html">List of algorithms</a>
+                  <li><a href="/users/basics/quickstart.html">Quickstart</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Working with text</li>
+                  <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
+                  <li><a href="/users/basics/collocations.html">Collocations</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Dimensionality reduction</li>
+                  <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
+                  <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Topic Models</li>      
+                  <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
+                </ul>
+                 </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Spark<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
+                  <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
+			      <li class="divider"></li>
+                  <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
+                </ul>
+               </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Classification<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
+                  <li><a href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
+                  <li><a href="/users/mapreduce/classification/logistic-regression.html">Logistic Regression</a></li>
+                  <li><a href="/users/mapreduce/classification/partial-implementation.html">Random Forest</a></li>
+
+                  <li class="divider"></li>
+                  <li class="nav-header">Examples</li>
+                  <li><a href="/users/mapreduce/classification/breiman-example.html">Breiman example</a></li>
+                  <li><a href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
+                </ul></li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Clustering<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/streaming-k-means.html">Streaming KMeans</a></li>
+                <li><a href="/users/mapreduce/clustering/spectral-clustering.html">Spectral Clustering</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Commandline usage</li>
+                <li><a href="/users/mapreduce/clustering/k-means-commandline.html">Options for k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-commandline.html">Options for Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Examples</li>
+                <li><a href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Post processing</li>
+                <li><a href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
+                <li><a href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
+                </ul></li>
+                <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
+                <li><a href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
+		<li><a href="/users/mapreduce/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Hadoop</li>
+                <li><a href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
+                <li><a href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
+                <li class="nav-header">Spark</li>
+                <li><a href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
+              </ul>
+            </li>
+           </ul>
+          </div><!--/.nav-collapse -->
+        </div>
+      </div>
+    </div>
+
+</div>
+
+ <div id="sidebar">
+  <div id="sidebar-wrap">
+    <h2>Twitter</h2>
+	<ul class="sidemenu">
+		<li>
+<a class="twitter-timeline" href="https://twitter.com/ApacheMahout" data-widget-id="422861673444028416">Tweets by @ApacheMahout</a>
+<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+"://platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
+</li>
+	</ul>
+    <h2>Apache Software Foundation</h2>
+    <ul class="sidemenu">
+      <li><a href="http://www.apache.org/foundation/how-it-works.html">How the ASF works</a></li>
+      <li><a href="http://www.apache.org/foundation/getinvolved.html">Get Involved</a></li>
+      <li><a href="http://www.apache.org/dev/">Developer Resources</a></li>
+      <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+      <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+    </ul>
+    <h2>Related Projects</h2>
+    <ul class="sidemenu">
+      <li><a href="http://lucene.apache.org/">Lucene</a></li>
+      <li><a href="http://hadoop.apache.org/">Hadoop</a></li>
+    </ul>
+  </div>
+</div>
+
+  <div id="content-wrap" class="clearfix">
+   <div id="main">
+    <h1 id="fuzzy-k-means">Fuzzy K-Means</h1>
+<p>Fuzzy K-Means (also called Fuzzy C-Means) is an extension of <a href="http://mahout.apache.org/users/clustering/k-means-clustering.html">K-Means</a>
+, the popular simple clustering technique. While K-Means discovers hard
+clusters (a point belong to only one cluster), Fuzzy K-Means is a more
+statistically formalized method and discovers soft clusters where a
+particular point can belong to more than one cluster with certain
+probability.</p>
+<p><a name="FuzzyK-Means-Algorithm"></a></p>
+<h4 id="algorithm">Algorithm</h4>
+<p>Like K-Means, Fuzzy K-Means works on those objects which can be represented
+in n-dimensional vector space and a distance measure is defined.
+The algorithm is similar to k-means.</p>
+<ul>
+<li>Initialize k clusters</li>
+<li>Until converged<ul>
+<li>Compute the probability of a point belong to a cluster for every <point,cluster> pair</li>
+<li>Recompute the cluster centers using above probability membership values of points to clusters</li>
+</ul>
+</li>
+</ul>
+<p><a name="FuzzyK-Means-DesignImplementation"></a></p>
+<h4 id="design-implementation">Design Implementation</h4>
+<p>The design is similar to K-Means present in Mahout. It accepts an input
+file containing vector points. User can either provide the cluster centers
+as input or can allow canopy algorithm to run and create initial clusters.</p>
+<p>Similar to K-Means, the program doesn't modify the input directories. And
+for every iteration, the cluster output is stored in a directory cluster-N.
+The code has set number of reduce tasks equal to number of map tasks. So,
+those many part-0</p>
+<p>Files are created in clusterN directory. The code uses
+driver/mapper/combiner/reducer as follows:</p>
+<p>FuzzyKMeansDriver - This is similar to&nbsp; KMeansDriver. It iterates over
+input points and cluster points for specified number of iterations or until
+it is converged.During every iteration i, a new cluster-i directory is
+created which contains the modified cluster centers obtained during
+FuzzyKMeans iteration. This will be feeded as input clusters in the next
+iteration.&nbsp; Once Fuzzy KMeans is run for specified number of
+iterations or until it is converged, a map task is run to output "the point
+and the cluster membership to each cluster" pair as final output to a
+directory named "points".</p>
+<p>FuzzyKMeansMapper - reads the input cluster during its configure() method,
+then&nbsp; computes cluster membership probability of a point to each
+cluster.Cluster membership is inversely propotional to the distance.
+Distance is computed using&nbsp; user supplied distance measure. Output key
+is encoded clusterId. Output values are ClusterObservations containing
+observation statistics.</p>
+<p>FuzzyKMeansCombiner - receives all key:value pairs from the mapper and
+produces partial sums of the cluster membership probability times input
+vectors for each cluster. Output key is: encoded cluster identifier. Output
+values are ClusterObservations containing observation statistics.</p>
+<p>FuzzyKMeansReducer - Multiple reducers receives certain keys and all values
+associated with those keys. The reducer sums the values to produce a new
+centroid for the cluster which is output. Output key is: encoded cluster
+identifier (e.g. "C14". Output value is: formatted cluster identifier (e.g.
+"C14"). The reducer encodes unconverged clusters with a 'Cn' cluster Id and
+converged clusters with 'Vn' clusterId.</p>
+<p><a name="FuzzyK-Means-RunningFuzzyk-MeansClustering"></a></p>
+<h2 id="running-fuzzy-k-means-clustering">Running Fuzzy k-Means Clustering</h2>
+<p>The Fuzzy k-Means clustering algorithm may be run using a command-line
+invocation on FuzzyKMeansDriver.main or by making a Java call to
+FuzzyKMeansDriver.run(). </p>
+<p>Invocation using the command line takes the form:</p>
+<div class="codehilite"><pre><span class="n">bin</span><span class="o">/</span><span class="n">mahout</span> <span class="n">fkmeans</span> <span class="o">\</span>
+    <span class="o">-</span><span class="nb">i</span> <span class="o">&lt;</span><span class="n">input</span> <span class="n">vectors</span> <span class="n">directory</span><span class="o">&gt;</span> <span class="o">\</span>
+    <span class="o">-</span><span class="n">c</span> <span class="o">&lt;</span><span class="n">input</span> <span class="n">clusters</span> <span class="n">directory</span><span class="o">&gt;</span> <span class="o">\</span>
+    <span class="o">-</span><span class="n">o</span> <span class="o">&lt;</span><span class="n">output</span> <span class="n">working</span> <span class="n">directory</span><span class="o">&gt;</span> <span class="o">\</span>
+    <span class="o">-</span><span class="n">dm</span> <span class="o">&lt;</span><span class="n">DistanceMeasure</span><span class="o">&gt;</span> <span class="o">\</span>
+    <span class="o">-</span><span class="n">m</span> <span class="o">&lt;</span><span class="n">fuzziness</span> <span class="n">argument</span> <span class="o">&gt;</span>1<span class="o">&gt;</span> <span class="o">\</span>
+    <span class="o">-</span><span class="n">x</span> <span class="o">&lt;</span><span class="n">maximum</span> <span class="n">number</span> <span class="n">of</span> <span class="n">iterations</span><span class="o">&gt;</span> <span class="o">\</span>
+    <span class="o">-</span><span class="n">k</span> <span class="o">&lt;</span><span class="n">optional</span> <span class="n">number</span> <span class="n">of</span> <span class="n">initial</span> <span class="n">clusters</span> <span class="n">to</span> <span class="n">sample</span> <span class="n">from</span> <span class="n">input</span> <span class="n">vectors</span><span class="o">&gt;</span> <span class="o">\</span>
+    <span class="o">-</span><span class="n">cd</span> <span class="o">&lt;</span><span class="n">optional</span> <span class="n">convergence</span> <span class="n">delta</span><span class="p">.</span> <span class="n">Default</span> <span class="n">is</span> 0<span class="p">.</span>5<span class="o">&gt;</span> <span class="o">\</span>
+    <span class="o">-</span><span class="n">ow</span> <span class="o">&lt;</span><span class="n">overwrite</span> <span class="n">output</span> <span class="n">directory</span> <span class="k">if</span> <span class="n">present</span><span class="o">&gt;</span>
+    <span class="o">-</span><span class="n">cl</span> <span class="o">&lt;</span><span class="n">run</span> <span class="n">input</span> <span class="n">vector</span> <span class="n">clustering</span> <span class="n">after</span> <span class="n">computing</span> <span class="n">Clusters</span><span class="o">&gt;</span>
+    <span class="o">-</span><span class="n">e</span> <span class="o">&lt;</span><span class="n">emit</span> <span class="n">vectors</span> <span class="n">to</span> <span class="n">most</span> <span class="n">likely</span> <span class="n">cluster</span> <span class="n">during</span> <span class="n">clustering</span><span class="o">&gt;</span>
+    <span class="o">-</span><span class="n">t</span> <span class="o">&lt;</span><span class="n">threshold</span> <span class="n">to</span> <span class="n">use</span> <span class="k">for</span> <span class="n">clustering</span> <span class="k">if</span> <span class="o">-</span><span class="n">e</span> <span class="n">is</span> <span class="n">false</span><span class="o">&gt;</span>
+    <span class="o">-</span><span class="n">xm</span> <span class="o">&lt;</span><span class="n">execution</span> <span class="n">method</span><span class="p">:</span> <span class="n">sequential</span> <span class="n">or</span> <span class="n">mapreduce</span><span class="o">&gt;</span>
+</pre></div>
+
+
+<p><em>Note:</em> if the -k argument is supplied, any clusters in the -c directory
+will be overwritten and -k random points will be sampled from the input
+vectors to become the initial cluster centers.</p>
+<p>Invocation using Java involves supplying the following arguments:</p>
+<ol>
+<li>input: a file path string to a directory containing the input data set a
+SequenceFile(WritableComparable, VectorWritable). The sequence file <em>key</em>
+is not used.</li>
+<li>clustersIn: a file path string to a directory containing the initial
+clusters, a SequenceFile(key, SoftCluster | Cluster | Canopy). Fuzzy
+k-Means SoftClusters, k-Means Clusters and Canopy Canopies may be used for
+the initial clusters.</li>
+<li>output: a file path string to an empty directory which is used for all
+output from the algorithm.</li>
+<li>measure: the fully-qualified class name of an instance of DistanceMeasure
+which will be used for the clustering.</li>
+<li>convergence: a double value used to determine if the algorithm has
+converged (clusters have not moved more than the value in the last
+iteration)</li>
+<li>max-iterations: the maximum number of iterations to run, independent of
+the convergence specified</li>
+<li>m: the "fuzzyness" argument, a double &gt; 1. For m equal to 2, this is
+equivalent to normalising the coefficient linearly to make their sum 1.
+When m is close to 1, then the cluster center closest to the point is given
+much more weight than the others, and the algorithm is similar to k-means.</li>
+<li>runClustering: a boolean indicating, if true, that the clustering step is
+to be executed after clusters have been determined.</li>
+<li>emitMostLikely: a boolean indicating, if true, that the clustering step
+should only emit the most likely cluster for each clustered point.</li>
+<li>threshold: a double indicating, if emitMostLikely is false, the cluster
+probability threshold used for emitting multiple clusters for each point. A
+value of 0 will emit all clusters with their associated probabilities for
+each vector.</li>
+<li>runSequential: a boolean indicating, if true, that the algorithm is to
+use the sequential reference implementation running in memory.</li>
+</ol>
+<p>After running the algorithm, the output directory will contain:
+1. clusters-N: directories containing SequenceFiles(Text, SoftCluster)
+produced by the algorithm for each iteration. The Text <em>key</em> is a cluster
+identifier string.
+1. clusteredPoints: (if runClustering enabled) a directory containing
+SequenceFile(IntWritable, WeightedVectorWritable). The IntWritable <em>key</em> is
+the clusterId. The WeightedVectorWritable <em>value</em> is a bean containing a
+double <em>weight</em> and a VectorWritable <em>vector</em> where the weights are
+computed as 1/(1+distance) where the distance is between the cluster center
+and the vector using the chosen DistanceMeasure. </p>
+<p><a name="FuzzyK-Means-Examples"></a></p>
+<h1 id="examples">Examples</h1>
+<p>The following images illustrate Fuzzy k-Means clustering applied to a set
+of randomly-generated 2-d data points. The points are generated using a
+normal distribution centered at a mean location and with a constant
+standard deviation. See the README file in the <a href="https://github.com/apache/mahout/blob/master/examples/src/main/java/org/apache/mahout/clustering/display/README.txt">/examples/src/main/java/org/apache/mahout/clustering/display/README.txt</a>
+ for details on running similar examples.</p>
+<p>The points are generated as follows:</p>
+<ul>
+<li>500 samples m=[1.0, 1.0](1.0,-1.0.html)
+ sd=3.0</li>
+<li>300 samples m=[1.0, 0.0](1.0,-0.0.html)
+ sd=0.5</li>
+<li>300 samples m=[0.0, 2.0](0.0,-2.0.html)
+ sd=0.1</li>
+</ul>
+<p>In the first image, the points are plotted and the 3-sigma boundaries of
+their generator are superimposed. </p>
+<p><img alt="fuzzy" src="../../images/SampleData.png" /></p>
+<p>In the second image, the resulting clusters (k=3) are shown superimposed upon the sample data. As Fuzzy k-Means is an iterative algorithm, the centers of the clusters in each recent iteration are shown using different colors. Bold red is the final clustering and previous iterations are shown in [orange, yellow, green, blue, violet and gray](orange,-yellow,-green,-blue,-violet-and-gray.html)
+. Although it misses a lot of the points and cannot capture the original,
+superimposed cluster centers, it does a decent job of clustering this data.</p>
+<p><img alt="fuzzy" src="../../images/FuzzyKMeans.png" /></p>
+<p>The third image shows the results of running Fuzzy k-Means on a different
+data set which is generated using asymmetrical standard deviations.
+Fuzzy k-Means does a fair job handling this data set as well.</p>
+<p><img alt="fuzzy" src="../../images/2dFuzzyKMeans.png" /></p>
+<p><a name="FuzzyK-Means-References&nbsp;"></a></p>
+<h4 id="referenceswzxhzdk15">References&nbsp;</h4>
+<ul>
+<li><a href="http://en.wikipedia.org/wiki/Fuzzy_clustering">http://en.wikipedia.org/wiki/Fuzzy_clustering</a></li>
+</ul>
+   </div>
+  </div>     
+</div> 
+  <footer class="footer" align="center">
+    <div class="container">
+      <p>
+        Copyright &copy; 2014 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+        Apache and the Apache feather logos are trademarks of The Apache Software Foundation.
+      </p>
+    </div>
+  </footer>
+  
+  <script src="/js/jquery-1.9.1.min.js"></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script>
+    (function() {
+      var cx = '012254517474945470291:vhsfv7eokdc';
+      var gcse = document.createElement('script');
+      gcse.type = 'text/javascript';
+      gcse.async = true;
+      gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
+          '//www.google.com/cse/cse.js?cx=' + cx;
+      var s = document.getElementsByTagName('script')[0];
+      s.parentNode.insertBefore(gcse, s);
+    })();
+  </script>
+</body>
+</html>

Added: websites/staging/mahout/trunk/content/users/mapreduce/clustering/hierarchical-clustering.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/mapreduce/clustering/hierarchical-clustering.html (added)
+++ websites/staging/mahout/trunk/content/users/mapreduce/clustering/hierarchical-clustering.html Thu Mar 19 21:21:45 2015
@@ -0,0 +1,283 @@
+<!DOCTYPE html>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <title>Apache Mahout: Scalable machine learning and data mining</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="Distribution" content="Global">
+  <meta name="Robots" content="index,follow">
+  <meta name="keywords" content="apache, apache hadoop, apache lucene,
+        business data mining, cluster analysis,
+        collaborative filtering, data extraction, data filtering, data framework, data integration,
+        data matching, data mining, data mining algorithms, data mining analysis, data mining data,
+        data mining introduction, data mining software,
+        data mining techniques, data representation, data set, datamining,
+        feature extraction, fuzzy k means, genetic algorithm, hadoop,
+        hierarchical clustering, high dimensional, introduction to data mining, kmeans,
+        knowledge discovery, learning approach, learning approaches, learning methods,
+        learning techniques, lucene, machine learning, machine translation, mahout apache,
+        mahout taste, map reduce hadoop, mining data, mining methods, naive bayes,
+        natural language processing,
+        supervised, text mining, time series data, unsupervised, web data mining">
+  <link rel="shortcut icon" type="image/x-icon" href="http://mahout.apache.org/images/favicon.ico">
+  <script type="text/javascript" src="/js/prototype.js"></script>
+  <script type="text/javascript" src="/js/effects.js"></script>
+  <script type="text/javascript" src="/js/search.js"></script>
+  <script type="text/javascript" src="/js/slides.js"></script>
+
+  <link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
+  <link href="/css/bootstrap-responsive.css" rel="stylesheet">
+  <link rel="stylesheet" href="/css/global.css" type="text/css">
+
+  <!-- mathJax stuff -- use `\(...\)` for inline style math in markdown -->
+  <script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    tex2jax: {
+      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
+    }
+  });
+  MathJax.Hub.Queue(function() {
+    var all = MathJax.Hub.getAllJax(), i;
+    for(i = 0; i < all.length; i += 1) {
+      all[i].SourceElement().parentNode.className += ' has-jax';
+    }
+  });
+  </script>
+  <script type="text/javascript">
+    var mathjax = document.createElement('script'); 
+    mathjax.type = 'text/javascript'; 
+    mathjax.async = true;
+
+    mathjax.src = ('https:' == document.location.protocol) ?
+        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' : 
+        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+	
+	  var s = document.getElementsByTagName('script')[0]; 
+    s.parentNode.insertBefore(mathjax, s);
+  </script>
+</head>
+
+<body id="home" data-twttr-rendered="true">
+  <div id="wrap">
+   <div id="header">
+    <div id="logo"><a href="/overview.html"></a></div>
+  <div id="search">
+    <form id="search-form" action="http://www.google.com/search" method="get" class="navbar-search pull-right">    
+      <input value="http://mahout.apache.org" name="sitesearch" type="hidden">
+      <input class="search-query" name="q" id="query" type="text">
+      <input id="submission" type="image" src="/images/mahout-lupe.png" alt="Search" />
+    </form>
+  </div>
+
+    <div class="navbar navbar-inverse" style="position:absolute;top:133px;padding-right:0px;padding-left:0px;">
+      <div class="navbar-inner" style="border: none; background: #999; border: none; border-radius: 0px;">
+        <div class="container">
+          <button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <!-- <a class="brand" href="#">Apache Community Development Project</a> -->
+          <div class="nav-collapse collapse">
+            <ul class="nav">
+              <li><a href="/">Home</a></li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/general/downloads.html">Downloads</a>
+                  <li><a href="/general/who-we-are.html">Who we are</a>
+                  <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
+                  <li><a href="/general/release-notes.html">Release Notes</a> 
+                  <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
+                  <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
+                  <li><a href="/general/professional-support.html">Professional Support</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Resources</li>
+                  <li><a href="/general/reference-reading.html">Reference Reading</a>
+                  <li><a href="/general/faq.html">FAQ</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Legal</li>
+                  <li><a href="http://www.apache.org/licenses/">License</a></li>
+                  <li><a href="http://www.apache.org/security/">Security</a></li>
+                  <li><a href="/general/privacy-policy.html">Privacy Policy</a>
+                </ul>
+              </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/developers/developer-resources.html">Developer resources</a></li>
+                  <li><a href="/developers/version-control.html">Version control</a></li>
+                  <li><a href="/developers/buildingmahout.html">Build from source</a></li>
+                  <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
+                  <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Contributions</li>
+                  <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
+                  <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
+                  <li><a href="/developers/gsoc.html">GSoC</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">For committers</li>
+                  <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
+                  <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
+                  <li><a href="/developers/github.html">Handling Github PRs</a></li>
+                  <li><a href="/developers/how-to-release.html">How to release</a></li>
+                  <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Basics<b class="caret"></b></a>
+                 <ul class="dropdown-menu">
+                  <li><a href="/users/basics/algorithms.html">List of algorithms</a>
+                  <li><a href="/users/basics/quickstart.html">Quickstart</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Working with text</li>
+                  <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
+                  <li><a href="/users/basics/collocations.html">Collocations</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Dimensionality reduction</li>
+                  <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
+                  <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Topic Models</li>      
+                  <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
+                </ul>
+                 </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Spark<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
+                  <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
+			      <li class="divider"></li>
+                  <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
+                </ul>
+               </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Classification<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
+                  <li><a href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
+                  <li><a href="/users/mapreduce/classification/logistic-regression.html">Logistic Regression</a></li>
+                  <li><a href="/users/mapreduce/classification/partial-implementation.html">Random Forest</a></li>
+
+                  <li class="divider"></li>
+                  <li class="nav-header">Examples</li>
+                  <li><a href="/users/mapreduce/classification/breiman-example.html">Breiman example</a></li>
+                  <li><a href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
+                </ul></li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Clustering<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/streaming-k-means.html">Streaming KMeans</a></li>
+                <li><a href="/users/mapreduce/clustering/spectral-clustering.html">Spectral Clustering</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Commandline usage</li>
+                <li><a href="/users/mapreduce/clustering/k-means-commandline.html">Options for k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-commandline.html">Options for Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Examples</li>
+                <li><a href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Post processing</li>
+                <li><a href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
+                <li><a href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
+                </ul></li>
+                <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
+                <li><a href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
+		<li><a href="/users/mapreduce/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Hadoop</li>
+                <li><a href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
+                <li><a href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
+                <li class="nav-header">Spark</li>
+                <li><a href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
+              </ul>
+            </li>
+           </ul>
+          </div><!--/.nav-collapse -->
+        </div>
+      </div>
+    </div>
+
+</div>
+
+ <div id="sidebar">
+  <div id="sidebar-wrap">
+    <h2>Twitter</h2>
+	<ul class="sidemenu">
+		<li>
+<a class="twitter-timeline" href="https://twitter.com/ApacheMahout" data-widget-id="422861673444028416">Tweets by @ApacheMahout</a>
+<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+"://platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
+</li>
+	</ul>
+    <h2>Apache Software Foundation</h2>
+    <ul class="sidemenu">
+      <li><a href="http://www.apache.org/foundation/how-it-works.html">How the ASF works</a></li>
+      <li><a href="http://www.apache.org/foundation/getinvolved.html">Get Involved</a></li>
+      <li><a href="http://www.apache.org/dev/">Developer Resources</a></li>
+      <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+      <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+    </ul>
+    <h2>Related Projects</h2>
+    <ul class="sidemenu">
+      <li><a href="http://lucene.apache.org/">Lucene</a></li>
+      <li><a href="http://hadoop.apache.org/">Hadoop</a></li>
+    </ul>
+  </div>
+</div>
+
+  <div id="content-wrap" class="clearfix">
+   <div id="main">
+    <p>Hierarchical clustering is the process or finding bigger clusters, and also
+the smaller clusters inside the bigger clusters.</p>
+<p>In Apache Mahout, separate algorithms can be used for finding clusters at
+different levels. </p>
+<p>See <a href="https://cwiki.apache.org/confluence/display/MAHOUT/Top+Down+Clustering">Top Down Clustering</a>
+.</p>
+   </div>
+  </div>     
+</div> 
+  <footer class="footer" align="center">
+    <div class="container">
+      <p>
+        Copyright &copy; 2014 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+        Apache and the Apache feather logos are trademarks of The Apache Software Foundation.
+      </p>
+    </div>
+  </footer>
+  
+  <script src="/js/jquery-1.9.1.min.js"></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script>
+    (function() {
+      var cx = '012254517474945470291:vhsfv7eokdc';
+      var gcse = document.createElement('script');
+      gcse.type = 'text/javascript';
+      gcse.async = true;
+      gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
+          '//www.google.com/cse/cse.js?cx=' + cx;
+      var s = document.getElementsByTagName('script')[0];
+      s.parentNode.insertBefore(gcse, s);
+    })();
+  </script>
+</body>
+</html>



Mime
View raw message