mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1243022 [9/38] - in /mahout/site/new_website: ./ MAHOUT/ MAHOUT/2010/ MAHOUT/2010/09/ MAHOUT/2010/09/14/ MAHOUT/2011/ MAHOUT/2011/10/ MAHOUT/2011/10/21/ MAHOUT/books-tutorials-and-talks.data/ MAHOUT/books-tutorials-talks.data/ MAHOUT/books...
Date Sat, 11 Feb 2012 10:22:31 GMT
Added: mahout/site/new_website/MAHOUT/dirichlet-process-clustering.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/dirichlet-process-clustering.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/dirichlet-process-clustering.html (added)
+++ mahout/site/new_website/MAHOUT/dirichlet-process-clustering.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,277 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Dirichlet Process Clustering</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Dirichlet Process Clustering">Dirichlet Process Clustering</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Dirichlet Process Clustering</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=101992">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=101992">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=101992">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=101992">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=101992">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=101992">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="DirichletProcessClustering-Overview"></A>Overview</H1>
+
+<P>The Dirichlet Process Clustering algorithm performs Bayesian mixture modeling.</P>
+
+<P>The idea is that we use a probabilistic mixture of a number of models that we use to explain some observed data. Each observed data point is assumed to have come from one of the models in the mixture, but we don't know which.  The way we deal with that is to use a so-called latent parameter which specifies which model each data point came from.</P>
+
+<P>In addition, since this is a Bayesian clustering algorithm, we don't want to actually commit to any single explanation, but rather to sample from the distribution of models and latent assignments of data points to models given the observed data and the prior distributions of model parameters. This sampling process is initialized by taking models at random from the prior distribution for models.</P>
+
+<P>Then, we iteratively assign points to the different models using the mixture probabilities and the degree of fit between the point and each model expressed as a probability that the point was generated by that model. After points are assigned, new parameters for each model are sampled from the posterior distribution for the model parameters considering all of the observed data points that were assigned to the model.  Models without any data points are also sampled, but since they have no points assigned, the new samples are effectively taken from the prior distribution for model parameters.</P>
+
+<P>The result is a number of samples that represent mixing probabilities, models and assignment of points to models. If the total number of possible models is substantially larger than the number that ever have points assigned to them, then this algorithm provides a (nearly) non-parametric clustering algorithm. These samples can give us interesting information that is lacking from a normal clustering that consists of a single assignment of points to clusters.  Firstly, by examining the number of models in each sample that actually has any points assigned to it, we can get information about how many models (clusters) that the data support. Morevoer, by examining how often two points are assigned to the same model, we can get an approximate measure of how likely these points are to be explained by the same model.  Such soft membership information is difficult to come by with conventional clustering methods.</P>
+
+<P>Finally, we can get an idea of the stability of how the data can be described.  Typically, aspects of the data with lots of data available wind up with stable descriptions while at the edges, there are aspects that are phenomena that we can't really commit to a solid description, but it is still clear that the well supported explanations are insufficient to explain these additional aspects. One thing that can be difficult about these samples is that we can't always assign a correlation between the models in the different samples.  Probably the best way to do this is to look for overlap in the assignments of data observations to the different models.</P>
+
+<H2><A name="DirichletProcessClustering-DesignofImplementation"></A>Design of Implementation</H2>
+
+<P>The implementation accepts one input directory containing the data points to be clustered. The data directory contains multiple input files of SequenceFile(key, VectorWritable). The input data points are not modified by the implementation, allowing experimentation with initial clustering and convergence values.</P>
+
+<P>The program iterates over the input points, outputting a new directory &quot;clusters-N&quot; containing SequenceFile(Text, DirichletCluster) files for each iteration N. This process uses a mapper/reducer/driver as follows:</P>
+
+<P>DirichletMapper - reads the input clusters during its configure() method, then assigns and outputs each input point to a probable cluster as defined by the model's pdf() function. Output <EM>key</EM> is: clusterId. Output <EM>value</EM> is: input point.<BR>
+DirichletReducer - reads the input clusters during its configure() method, then each reducer receives clusterId:VectorWritable pairs from all mappers and accumulates them to produce a new posterior model for each cluster which is output. Output <EM>key</EM> is: clusterId. Output value is: DirichletCluster. Reducer outputs are used as the input clusters for the next iteration.<BR>
+DirichletDriver - iterates over the points and clusters until the given number of iterations has been reached. During iterations, a new clusters directory &quot;clusters-N&quot; is produced with the output clusters from the previous iteration used for input to the next. A final optional pass over the data using the DirichletClusterMapper clusters all points to an output directory &quot;clusteredPoints&quot; and has no combiner or reducer steps.</P>
+
+<H2><A name="DirichletProcessClustering-RunningDirichletProcessClustering"></A>Running Dirichlet Process Clustering</H2>
+
+<P>The Dirichlet clustering algorithm may be run using a command-line invocation on DirichletDriver.main or by making a Java call to DirichletDriver.runJob(). </P>
+
+<P>Invocation using the command line takes the form:</P>
+
+<DIV class="preformatted panel" style="border-width: 1px;"><DIV class="preformattedContent panelContent">
+<PRE>bin/mahout dirichlet \
+    -i &lt;input vectors directory&gt; \
+    -o &lt;output working directory&gt; \
+    -a0 &lt;the alpha_0 parameter to the Dirichlet Distribution&gt;
+    -x &lt;maximum number of iterations&gt; \
+    -k &lt;number of models to create from prior&gt; \
+    -md &lt;the ModelDistribution class name. Default NormalModelDistribution&gt; \
+    -mp &lt;the ModelPrototype class name. Default SequentialAccessSparseVector&gt; \
+    -dm &lt;optional DistanceMeasure class name for some ModelDistribution&gt;
+    -ow &lt;overwrite output directory if present&gt;
+    -cl &lt;run input vector clustering after computing Clusters&gt;
+    -e &lt;emit vectors to most likely cluster during clustering&gt;
+    -t &lt;threshold to use for clustering if -e is false&gt;
+    -xm &lt;execution method: sequential or mapreduce&gt;
+</PRE>
+</DIV></DIV>
+
+<P>Invocation using Java involves supplying the following arguments:</P>
+
+<OL>
+	<LI>input: a file path string to a directory containing the input data set a SequenceFile(WritableComparable, VectorWritable). The sequence file <EM>key</EM> is not used.</LI>
+	<LI>output: a file path string to an empty directory which is used for all output from the algorithm.</LI>
+	<LI>modelFactory: an instance of ModelDistribution which will be used for the clustering.</LI>
+	<LI>numClusters: the number of models to be used for the clustering. This should be larger than the number of clusters which is expected in the data set.</LI>
+	<LI>maxIterations: the number of iterations to run for the clustering.</LI>
+	<LI>alpha_0: a double value (default is 1.0) used for creating the DirichletDistribution. Influences the likelihood that new, empty clusters will be selected for assignment in the first iteration.</LI>
+	<LI>runClustering: a boolean indicating, if true, that the clustering step is to be executed after clusters have been determined.</LI>
+	<LI>emitMostLikely: a boolean indicating, if true, that the clustering step should only emit the most likely cluster for each clustered point.</LI>
+	<LI>threshold: a double indicating, if emitMostLikely is false, the cluster probability threshold used for emitting multiple clusters for each point. A value of 0 will emit all clusters with their associated probabilities for each vector.</LI>
+	<LI>runSequential: a boolean indicating, if true, that the clustering is to be run using the sequential reference implementation in memory.</LI>
+</OL>
+
+
+<P>After running the algorithm, the output directory will contain:</P>
+<OL>
+	<LI>clusters-N: directories containing SequenceFiles(Text, DirichletCluster) produced by the algorithm for each iteration. The Text <EM>key</EM> is a cluster identifier string.</LI>
+	<LI>clusteredPoints: (if runClustering enabled) a directory containing SequenceFile(IntWritable, WeightedVectorWritable). The IntWritable <EM>key</EM> is the clusterId. The WeightedVectorWritable <EM>value</EM> is a bean containing a double <EM>weight</EM> and a VectorWritable <EM>vector</EM> where the weight indicates the probability that the vector is a member of the cluster.</LI>
+</OL>
+
+
+
+<H1><A name="DirichletProcessClustering-Examples"></A>Examples</H1>
+
+<P>The following images illustrate three different prior models applied to a set of randomly-generated 2-d data points. The points are generated using a normal distribution centered at a mean location and with a constant standard deviation. See the README file in the <A href="http://svn.apache.org/repos/asf/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/README.txt" class="external-link" rel="nofollow">/examples/src/main/java/org/apache/mahout/clustering/display/README.txt</A> for details on running similar examples.</P>
+
+<P>The points are generated as follows:</P>
+
+<UL>
+	<LI>500 samples m=[1.0, 1.0] sd=3.0</LI>
+	<LI>300 samples m=[1.0, 0.0] sd=0.5</LI>
+	<LI>300 samples m=[0.0, 2.0] sd=0.1</LI>
+</UL>
+
+
+<P>In the first image, the points are plotted and the 3-sigma boundaries of their generator are superimposed. It is, of course, impossible to tell which model actually generated each point as there is some probability - perhaps small - that any of the models could have generated every point.</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="dirichlet-process-clustering.data/SampleData.png" style="border: 0px solid black"></SPAN></P>
+
+<P>In the next image, the Dirichlet Process Clusterer is run against the sample points using a NormalModelDistribution with m=[0.0, 0.0] sd=1.0. This distribution represents the least amount of prior information, as its sampled models all have constant parameters. The resulting significant models (representing &gt; 5% of the population) are superimposed upon the sample data. Since all prior models are identical and their pdfs are the same, the first iteration's assignment of points to models is completely governed by the initial mixture values. Since these are also identical, it means the first iteration assigns points to models at random. During subsequent iterations, the models diverge from the origin but there is some over-fitting in the result.</P>
+
+<P>As Dirichlet clustering is an iterative process, the following illustrations include the cluster information from all iterations. The final cluster values are in bold red and earlier iterations are shown in [orange, yellow, green, blue, violet and the rest are all gray]. These illustrate the cluster convergence process over the last several iterations and can be helpful in tuning the algorithm.</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="dirichlet-process-clustering.data/DirichletN.png" style="border: 0px solid black"></SPAN></P>
+
+<P>The next image improves upon this situation by using a SampledNormalDistribution. In this distribution, the prior models have means that are sampled from a normal distribution and all have a constant sd=1. This distribution creates initial models that are centered at different coordinates. During the first iteration, each model thus has a different pdf for each point and the iteration assigns points to the more-likely models given this value. The result is a decent capture of the sample data parameters but there is still some over-fitting.</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="dirichlet-process-clustering.data/DirichletSN.png" style="border: 0px solid black"></SPAN></P>
+
+<P>The above image was run through 20 iterations and the cluster assignments are clearly moving indicating the clustering is not yet converged. The next image runs the same model for 40 iterations, producing an accurate model of the input data.</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="dirichlet-process-clustering.data/DirichletSN40.png" style="border: 0px solid black"></SPAN></P>
+
+<P>The next image uses an AsymmetricSampledNormalDistribution in which the model's standard deviation is also represented as a 2-d vector. This causes the clusters to assume elliptical shapes in the resulting clustering. This represents an incorrect prior assumption but it is interesting that it fits the actual sample data quite well. Had we suspected the sample points were generated in a similar manner then this distribution would have been the most logical model.</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="dirichlet-process-clustering.data/DirichletASN.png" style="border: 0px solid black"></SPAN></P>
+
+<P>In order to explore an asymmetrical sample data distribution, the following image shows a number of points generated according to the following parameters. Again, the generator's 3-sigma ellipses are superimposed:</P>
+
+<UL>
+	<LI>500 samples m=[1.0, 1.0] sd=[3.0, 1.0]</LI>
+	<LI>300 samples m=[1.0, 0.0] sd=[0.5, 1.0]</LI>
+	<LI>300 samples m=[0.0, 2.0] sd=[0.1, 0.5]</LI>
+</UL>
+
+
+<P><SPAN class="image-wrap" style=""><IMG src="dirichlet-process-clustering.data/AsymmetricSampleData.png" style="border: 0px solid black"></SPAN></P>
+
+<P>The following image shows the results of applying the symmetrical SampledNormalDistribution to the asymmetrically-generated sample data. It does a valiant effort but does not capture a very good set of models because the circular model assumption does not fit the data.</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="dirichlet-process-clustering.data/2dDirichletSN.png" style="border: 0px solid black"></SPAN></P>
+
+<P>Finally, the AsymmetricSampledNormalDistribution is run against the asymmetrical sample data. Though there is some over-fitting, it does a credible job of capturing the underlying models. Different arguments (numClusters, alpha0, numIterations) and display thresholds will yield slightly different results. Compare the first run of numClusters=20 models for 20 iterations with another run of numClusters=40 models for 40 iterations.</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="dirichlet-process-clustering.data/2dDirichletASN.png" style="border: 0px solid black"></SPAN><BR>
+<SPAN class="image-wrap" style=""><IMG src="dirichlet-process-clustering.data/2dDirichletASN4040.png" style="border: 0px solid black"></SPAN></P>
+
+<H1><A name="DirichletProcessClustering-References"></A>References</H1>
+
+<P>McCullagh and Yang: <A href="http://ba.stat.cmu.edu/journal/2008/vol03/issue01/yang.pdf" class="external-link" rel="nofollow">http://ba.stat.cmu.edu/journal/2008/vol03/issue01/yang.pdf</A></P>
+
+<P>There is also a more approachable example in <A href="http://research.microsoft.com/en-us/um/people/cmbishop/PRML/index.htm" class="external-link" rel="nofollow">Chris Bishop's book on Machine Learning</A>. I think that chapter 9 is where the example of clustering using a mixture model is found.</P>
+
+<P>The Neal and Blei references from the McCullagh and Yang paper are also good. Zoubin Gharamani has some very <A href="http://learning.eng.cam.ac.uk/zoubin/talks/uai05tutorial-b.pdf" class="external-link" rel="nofollow">nice tutorials out which describe why non-parametric Bayesian approaches to problems are very cool</A>, there are video versions about as well.</P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.6 Build: 2036 Dec 21, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/display-clustering.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/display-clustering.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/display-clustering.html (added)
+++ mahout/site/new_website/MAHOUT/display-clustering.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,144 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Display Clustering</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="quickstart.html" title="Quickstart">Quickstart</A>&nbsp;&gt;&nbsp;<A href="" title="Display Clustering">Display Clustering</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Display Clustering</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=23335706">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=23335706">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=23335706">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=23335706">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=23335706">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=23335706">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/downloads.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/downloads.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/downloads.html (added)
+++ mahout/site/new_website/MAHOUT/downloads.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,189 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Downloads</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="" title="Downloads">Downloads</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Downloads</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=22872410">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=22872410">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=22872410">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=22872410">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=22872410">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=22872410">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Apache Mahout is an official Apache project and thus available from any of the Apache mirrors.</P>
+
+<H2><A name="Downloads-OfficialRelease"></A>Official Release</H2>
+
+<P>The latest Mahout release is available for download at: <B><A href="http://www.apache.org/dyn/closer.cgi/mahout/" class="external-link" rel="nofollow">http://www.apache.org/dyn/closer.cgi/mahout/</A></B></P>
+
+<P><A href="system-requirements.html" title="System Requirements">System Requirements</A> are detailed online. </P>
+
+<H2><A name="Downloads-SnapshotReleases"></A>Snapshot Releases</H2>
+
+<P>Recent builds are available on the Apache Jenkins <A href="https://builds.apache.org/job/Mahout-Quality/changes" class="external-link" rel="nofollow">build server</A><BR>
+To get the latest, navigate to &quot;Build History&quot;, in the lower left-hand corner.<BR>
+Pick the latest build which has a yellow ball instead of a red ball.<BR>
+Go to &quot;Build Artifacts&quot; page.<BR>
+Download &quot;all files in trunk&quot; zip file</P>
+
+<P>Now, how you go from there to running examples: you don't. These build<BR>
+artifacts do not include the shell scripts and other materiel needed<BR>
+to run the Mahout examples: you are only able to use the jars in your<BR>
+own projects.</P>
+
+<P><A href="https://issues.apache.org/jira/browse/MAHOUT-935" class="external-link" rel="nofollow">MAHOUT-935</A> is filed as a marker to make the builds more useable. You are welcome and encouraged to fix this problem!</P>
+
+<H2><A name="Downloads-FutureReleases"></A>Future Releases</H2>
+
+<P>Official releases are usually created when the developers feel there are sufficient changes, improvements and bug fixes to warrant a release. Watch the <A href="https://cwiki.apache.org/MAHOUT/mailinglistarchives.html" class="external-link" rel="nofollow">Mailing lists</A> for latest release discussions.</P>
+
+<H2><A name="Downloads-Backwardscompatibilityofreleases"></A>Backwards compatibility of releases</H2>
+
+<P><EM>Please note that below backwards compatibility contract is a draft. As soon as finished the line you are about to read will be deleted.</EM></P>
+
+<P>As of version 1.0 Apache Mahout will provide the following backwards compatibility guarantees.</P>
+
+<H3><A name="Downloads-Datainputformats%2CModelformats%2CModeloutput"></A>Data input formats, Model formats, Model output</H3>
+
+<P>All minor versions within a major release can read prior versions data within the same major release.  That is, 3.4 can read a 3.3 data.  However, 3.3 cannot read a 3.4 data file.  When a user reads 3.3 data w/ 3.4, it is silently upgraded to 3.4.</P>
+
+<P>A major TODO that you can help with if you are interested is to provide serialization methods based on <A href="http://avro.apache.org/" class="external-link" rel="nofollow">Apache Avro</A> that would make support for input, output and model format upgrade easier.</P>
+
+<P>Another option is to use the <A href="http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/SequenceFile.Metadata.html" class="external-link" rel="nofollow">SequenceFile.Metadata</A> feature to store file format versions.</P>
+
+<H3><A name="Downloads-APIsandcommandlinescripts"></A>APIs and command line scripts</H3>
+
+<P>For APIs, we typically mark things as @mahout.experimental if we think they may change within minor releases.  We also mark things as deprecated that are going away.  Deprecated items are then removed on the next major release.  The upgrade path is usually to go to x.9, remove all deprecations and then go to x+1.0.</P>
+
+<P>We also communicate to users via release notes when we purposefully broke back compat.</P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/expectation-maximization.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/expectation-maximization.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/expectation-maximization.html (added)
+++ mahout/site/new_website/MAHOUT/expectation-maximization.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,163 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Expectation Maximization</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Expectation Maximization">Expectation Maximization</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Expectation Maximization</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75686">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75686">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75686">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75686">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75686">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75686">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="ExpectationMaximization-ExpectationMaximization"></A>Expectation Maximization</H1>
+
+<P>The principle of EM can be applied to several learning settings, but is most commonly associated with clustering. The main principle of the algorithm is comparable to k-Means. Yet in contrast to hard cluster assignments, each object is given some probability to belong to a cluster. Accordingly cluster centers are recomputed based on the average of all objects weighted by their probability of belonging to the cluster at hand.</P>
+
+<H2><A name="ExpectationMaximization-CanopymodifiedEM"></A>Canopy-modified EM</H2>
+
+<P>One can also use the canopies idea to speed up prototypebased clustering methods like K-means and Expectation-Maximization (EM). In general, neither K-means nor EMspecify how many clusters to use. The canopies technique does not help this choice.</P>
+
+<P>Prototypes (our estimates of the cluster centroids) are associated with the canopies that contain them, and the prototypes are only influenced by data that are inside their associated canopies. After creating the canopies, we decide how many prototypes will be created for each canopy. This could be done, for example, using the number of data points in a canopy and AIC or BIC where points that occur in more than one canopy are counted fractionally. Then we place prototypesinto each canopy. This initial placement can be random, as long as it is within the canopy in question, as determined by the inexpensive distance metric.</P>
+
+<P>Then, instead of calculating the distance from each prototype to every point (as is traditional, a O(nk) operation), theE-step instead calculates the distance from each prototype to a much smaller number of points. For each prototype, we find the canopies that contain it (using the cheap distance metric), and only calculate distances (using the expensive distance metric) from that prototype to points within those canopies.</P>
+
+<P>Note that by this procedure prototypes may move across canopy boundaries when canopies overlap. Prototypes may move to cover the data in the overlapping region, and then move entirely into another canopy in order to cover data there.</P>
+
+<P>The canopy-modified EM algorithm behaves very similarly to traditional EM, with the slight difference that points outside the canopy have no influence on points in the canopy, rather than a minute influence. If the canopy property holds, and points in the same cluster fall in the same canopy, then the canopy-modified EM will almost always converge to the same maximum in likelihood as the traditional EM. In fact, the difference in each iterative step (apart from the enormous computational savings of computing fewer terms) will be negligible since points outside the canopy will have exponentially small influence.</P>
+
+<H2><A name="ExpectationMaximization-StrategyforParallelization"></A>Strategy for Parallelization</H2>
+
+<H2><A name="ExpectationMaximization-Map%2FReduceImplementation"></A>Map/Reduce Implementation</H2>
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/faq.data/Mahout Overview.ppt
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/faq.data/Mahout%20Overview.ppt?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/faq.data/Mahout Overview.ppt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/faq.data/Mahout-logo.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/faq.data/Mahout-logo.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/faq.data/Mahout-logo.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/faq.data/Mahout-logo.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/faq.data/Mahout-logo.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/faq.data/Mahout-logo.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/faq.data/Mahout-logo.svg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/faq.data/Mahout-logo.svg?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/faq.data/Mahout-logo.svg (added)
+++ mahout/site/new_website/MAHOUT/faq.data/Mahout-logo.svg Sat Feb 11 10:22:15 2012
@@ -0,0 +1,210 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://web.resource.org/cc/"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:xlink="http://www.w3.org/1999/xlink"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   id="svg2225"
+   sodipodi:version="0.32"
+   inkscape:version="0.45.1"
+   width="294"
+   height="312"
+   version="1.0"
+   sodipodi:docbase="D:\Grafika\mahout\final\public"
+   sodipodi:docname="Mahout-logo.svg"
+   inkscape:output_extension="org.inkscape.output.svg.inkscape"
+   inkscape:export-filename="D:\Grafika\mahout\final\Mahout-logo.png"
+   inkscape:export-xdpi="90"
+   inkscape:export-ydpi="90">
+  <metadata
+     id="metadata2230">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title>Mahout</dc:title>
+        <dc:creator>
+          <cc:Agent>
+            <dc:title>Lukas Vlcek</dc:title>
+          </cc:Agent>
+        </dc:creator>
+        <cc:license
+           rdf:resource="" />
+        <dc:description>Mahout project logo</dc:description>
+        <dc:date>2008-05-12</dc:date>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs2228">
+    <linearGradient
+       id="linearGradient30064">
+      <stop
+         style="stop-color:#ffffff;stop-opacity:1;"
+         offset="0"
+         id="stop30066" />
+      <stop
+         style="stop-color:#ffffff;stop-opacity:0;"
+         offset="1"
+         id="stop30068" />
+    </linearGradient>
+    <linearGradient
+       id="linearGradient21032">
+      <stop
+         style="stop-color:#9d9e3a;stop-opacity:1;"
+         offset="0"
+         id="stop21034" />
+      <stop
+         style="stop-color:#9c9e3a;stop-opacity:0;"
+         offset="1"
+         id="stop21036" />
+    </linearGradient>
+    <radialGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient30064"
+       id="radialGradient33970"
+       cx="-70.286964"
+       cy="166.3316"
+       fx="-70.286964"
+       fy="166.3316"
+       r="19.129021"
+       gradientUnits="userSpaceOnUse" />
+    <radialGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient30064"
+       id="radialGradient35919"
+       gradientUnits="userSpaceOnUse"
+       cx="-70.286964"
+       cy="166.3316"
+       fx="-70.286964"
+       fy="166.3316"
+       r="19.129021" />
+    <radialGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient30064"
+       id="radialGradient35945"
+       gradientUnits="userSpaceOnUse"
+       cx="-70.286964"
+       cy="166.3316"
+       fx="-70.286964"
+       fy="166.3316"
+       r="19.129021" />
+    <radialGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient30064"
+       id="radialGradient35964"
+       gradientUnits="userSpaceOnUse"
+       cx="-70.286964"
+       cy="166.3316"
+       fx="-70.286964"
+       fy="166.3316"
+       r="19.129021" />
+  </defs>
+  <sodipodi:namedview
+     inkscape:window-height="808"
+     inkscape:window-width="1152"
+     inkscape:pageshadow="2"
+     inkscape:pageopacity="0.0"
+     guidetolerance="10.0"
+     gridtolerance="10.0"
+     objecttolerance="10.0"
+     borderopacity="1.0"
+     bordercolor="#666666"
+     pagecolor="#ffffff"
+     id="base"
+     inkscape:zoom="1.3870171"
+     inkscape:cx="147"
+     inkscape:cy="129.15736"
+     inkscape:window-x="0"
+     inkscape:window-y="22"
+     inkscape:current-layer="layer4"
+     showguides="true"
+     inkscape:guide-bbox="true"
+     inkscape:document-units="pt" />
+  <g
+     inkscape:groupmode="layer"
+     id="layer4"
+     inkscape:label="mahout"
+     style="display:inline"
+     sodipodi:insensitive="true">
+    <rect
+       style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect11180"
+       width="163.66051"
+       height="191.05753"
+       x="69.93425"
+       y="59.659931" />
+    <path
+       style="fill:#1ba80d;fill-opacity:1;stroke:none;display:inline"
+       d="M 77.419318,72.579299 L 77.419318,226.2332 L 226.54943,226.2332 L 226.54943,72.579299 L 77.419318,72.579299 z "
+       id="rect9147"
+       sodipodi:nodetypes="ccccc" />
+    <path
+       style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline"
+       d="M 163.67524,103.28975 C 161.59787,103.95918 157.47539,107.16074 155.99324,108.76502 C 154.13477,107.36909 152.82662,106.32308 151.34268,105.74651 C 149.6138,105.07477 147.83067,104.77213 145.11926,104.86659 C 142.23987,104.9669 138.41354,106.30098 136.45681,107.39177 C 134.85459,108.28494 131.53008,111.35107 131.8981,116.93846 C 132.08176,119.72667 132.7189,121.38634 133.10491,122.13298 C 133.18664,122.29106 124.58498,123.64251 119.5044,124.69454 C 114.3952,125.7525 105.96714,127.35326 103.20649,127.91475 C 101.38348,128.28553 99.15978,128.916 97.86701,130.9212 C 96.56827,132.93565 96.602,138.60325 97.59699,143.25039 C 98.60662,147.9659 100.1645,151.86008 103.33101,156.0958 C 106.51468,160.35448 110.88726,164.3983 116.02249,165.83531 C 121.15103,167.27046 126.16165,167.63177 130.19757,167.53211 C 134.23349,167.43245 137.24077,167.19316 139.40033,166.81176 C 140.30345,165.16636 140.90363,163.34653 141.43712,161.74975 C 141.87864,160.42824 142.34438,159.20949 142.992
 45,157.78501 C 140.47409,155.50535 138.68337,152.65397 137.69107,150.94671 C 136.69876,149.23945 136.45195,147.08572 136.49643,145.16183 C 136.53839,143.3469 136.98655,142.07389 137.86274,140.696 C 138.69231,139.39144 139.54429,141.47033 140.30024,142.52813 C 141.05619,143.58592 141.42799,145.88042 142.70515,148.35556 C 143.97295,150.81257 144.6742,151.74219 146.88193,153.64205 C 149.0838,155.53686 152.38544,157.19715 153.86274,157.8835 C 153.83596,158.67181 153.67489,160.96504 153.58149,162.446 C 153.48374,163.99617 153.79563,165.47459 154.11274,166.97725 C 154.50601,168.84081 155.40523,170.38765 156.24013,172.03262 C 157.09908,173.72498 158.39565,176.2072 158.82399,179.00638 C 159.24836,181.77961 159.49918,187.78682 159.49918,190.53366 C 159.49918,193.70646 159.67495,196.83898 160.79651,200.75314 C 161.94274,204.75341 163.9273,207.69164 166.27674,209.96654 C 168.61596,212.23153 171.1352,213.90113 173.65915,214.59044 C 176.21168,215.28756 179.57182,215.8049 183.04224,214.93
 843 C 184.89584,214.47564 189.11523,212.26983 191.21725,211.15483 C 193.30197,210.04899 195.56483,208.65996 196.98774,208.72725 C 197.77214,208.76435 198.55189,208.89661 199.13945,209.12616 C 199.10248,210.803 199.00634,213.20251 199.97136,214.34784 C 201.02306,215.59604 202.54211,215.72183 203.65334,215.69002 C 205.25103,215.64428 206.33368,215.00613 207.49228,213.9339 C 208.51487,212.98752 209.1145,210.33478 208.57526,207.69447 C 208.03448,205.04663 204.62208,202.10562 204.50968,202.02108 C 204.78502,201.98135 206.51649,199.26715 206.99488,197.20932 C 207.4756,195.14149 206.62803,193.61526 205.34653,192.40466 C 204.08165,191.20976 201.07763,191.08428 199.41159,191.71189 C 198.01185,192.23918 196.89966,193.56555 196.39174,194.81058 C 195.97197,195.83954 196.09789,198.11196 196.55024,200.2585 C 195.04693,200.37252 188.80285,200.38211 187.86274,200.5085 C 187.6798,200.53309 187.11199,200.58877 186.62142,200.68348 C 186.66147,201.70312 186.45037,203.26883 185.93925,205.07869 C
  185.4309,206.87874 184.43213,208.3355 183.70649,208.72725 C 182.98644,209.11598 182.17653,209.1511 181.39399,208.7585 C 180.60333,208.36182 180.11834,208.15015 179.92524,206.8835 C 179.73214,205.61685 180.3062,204.58774 180.73774,203.85225 C 181.16928,203.11676 182.28258,202.34443 182.92524,201.72725 C 183.64506,201.03597 183.74608,200.68686 183.67524,199.946 C 183.55359,198.67372 182.58653,198.05815 181.44367,198.17354 C 180.31641,198.28735 178.71901,199.3563 177.83737,199.92307 C 176.95654,200.48931 175.1975,201.58741 174.30024,201.946 C 173.39054,202.30956 172.60987,202.47642 171.6941,202.07313 C 170.7487,201.65679 170.69501,200.57187 170.74958,199.43165 C 170.80621,198.2482 171.63974,197.57751 172.48774,197.1335 C 173.32817,196.69345 174.40138,196.57268 175.36274,196.321 C 176.28801,196.07877 177.04685,195.81527 178.05024,194.946 C 179.06889,194.06351 179.27106,193.53724 179.39399,192.60225 C 179.52158,191.63179 178.4866,190.61673 177.49196,190.36196 C 176.50089,190.108
 09 175.11129,190.45552 174.11274,190.72725 C 173.15763,190.98716 172.21458,191.4325 171.26899,191.3835 C 170.07904,191.32184 169.58753,190.99508 168.92524,190.16475 C 168.25092,189.31818 168.36285,187.26174 169.08149,186.41475 C 169.80136,185.56632 170.52868,185.34304 171.86274,185.03975 C 173.19027,184.73795 174.59599,184.98696 175.86274,185.03975 C 176.61895,185.07124 178.67459,185.466 178.73774,185.47725 C 178.64387,185.49944 179.45408,182.19947 179.98774,180.28975 C 180.7009,177.73764 182.15129,175.48583 182.36274,172.821 C 183.05334,164.11778 180.25872,162.40488 178.89399,159.0085 C 177.49606,155.52949 177.60072,152.42706 177.70649,150.60225 C 177.81132,148.7937 179.29863,145.53441 180.17658,142.19787 C 181.06648,138.81595 181.32032,136.04759 181.36274,134.35225 C 181.41599,132.22387 181.46445,131.18638 182.36274,131.03975 C 183.24113,130.89636 183.74425,132.06594 183.80024,132.1335 C 184.28454,132.7178 184.4213,132.7147 184.73774,133.97725 C 185.05117,135.22778 185.017
 25,137.43881 185.02424,139.01233 C 185.03123,140.46586 184.55425,142.52692 184.02761,144.19723 C 183.50577,145.85232 181.45595,148.74374 181.63838,152.15385 C 183.47843,152.45064 186.66574,153.25146 189.17179,154.42936 C 191.67783,155.60726 193.49151,157.21149 194.40138,158.49984 C 196.50874,157.8606 199.77477,154.06911 202.50505,150.78553 C 205.40526,147.29244 207.33034,144.22559 208.81015,141.13491 C 210.34467,137.92999 211.22934,136.1286 212.67524,131.196 C 214.13135,126.24042 214.32582,121.20539 213.98774,118.321 C 213.64966,115.41619 212.17563,113.093 209.71218,112.35419 C 207.24447,111.6141 203.77312,112.28304 201.17524,112.8835 C 198.57736,113.48396 195.36437,114.51561 191.16361,115.91238 C 186.96978,117.30685 182.65541,118.85389 180.79688,119.41352 C 180.08791,117.49962 179.29854,113.27507 178.2292,110.44709 C 177.15986,107.61912 176.03424,105.77405 174.77649,104.76783 C 173.1454,103.46292 171.20563,102.98555 169.45649,102.78975 C 166.68925,102.47999 165.25342,102.78
 118 163.67524,103.28975 z M 172.26899,130.97725 C 173.51248,131.66514 173.71306,133.87111 173.49143,135.53385 C 173.35155,136.58329 173.09202,137.41826 172.7792,138.41192 C 172.44954,139.45909 171.78625,140.32534 171.13472,140.31106 C 168.61902,140.25593 169.70359,135.7797 170.08149,133.2585 C 170.33914,131.53954 171.25737,130.90298 172.26899,130.97725 z M 153.73774,133.7585 C 154.46696,133.65883 154.82371,133.80181 155.39395,134.4496 C 155.96418,135.09739 156.73774,137.84714 156.73774,138.72725 C 156.73774,139.73508 156.67198,140.63056 156.55024,141.60225 C 156.43123,142.55225 156.08851,143.46635 155.26899,143.696 C 154.11013,144.02075 153.61446,142.88069 153.36274,142.53975 C 152.53787,141.42249 152.61243,139.36262 152.51899,138.03975 C 152.42231,136.67103 152.3646,134.27932 153.73774,133.7585 z "
+       id="path4234"
+       sodipodi:nodetypes="ccssssszszzzzzcsczsszzzcssszsszzszscssszczzsscsczzzzzsszszzzzzzzzsszzzsssszzzszszzzczcsszzzzzczssscsssscczsssssc" />
+    <path
+       style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline"
+       d="M 146.12219,160.5071 C 147.57743,160.98509 148.9971,161.63065 150.21048,162.57324 C 151.11418,163.33647 151.17161,164.45413 150.87777,165.51324 C 150.67564,167.61389 151.20593,169.61274 151.20212,171.71507 C 151.03179,173.17501 149.1551,176.75232 147.37213,179.48409 C 145.58159,182.22746 143.63017,184.04479 141.75409,185.48308 C 140.8278,182.97758 140.60113,179.33765 140.66903,177.57786 C 140.73692,175.79255 141.52194,171.37988 142.37806,168.76052 C 143.23742,166.13129 145.1476,161.78605 146.12219,160.5071 z "
+       id="path6206"
+       sodipodi:nodetypes="cccczczzc" />
+    <path
+       style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline"
+       d="M 182.39865,157.12785 C 182.41759,157.67108 182.30234,158.50331 182.54857,159.32223 C 183.00854,160.85209 183.93391,161.60777 184.52303,163.40228 C 185.11212,165.19666 185.56684,166.42095 185.69066,168.5953 C 187.82245,170.20606 190.43148,172.57324 192.20453,173.48601 C 193.99374,174.40711 198.53102,175.40712 198.92909,175.62711 C 199.23641,175.79695 197.57742,171.63623 196.37251,169.4926 C 195.1676,167.34897 192.71102,163.55802 191.19435,161.58507 C 189.66906,159.6009 188.07247,158.28643 186.30112,157.56636 C 184.59376,156.8723 183.17967,156.50446 182.39865,157.12785 z "
+       id="path6208"
+       sodipodi:nodetypes="cszczszzzc" />
+    <path
+       style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline"
+       d="M 137.64481,172.98943 C 137.40625,174.28567 136.7458,177.80453 137.30562,181.70091 C 137.86514,185.59523 139.34571,188.33073 141.12821,190.83239 C 143.86507,189.15941 146.44839,186.99129 148.51883,184.43378 C 150.58121,181.88622 152.89575,178.32556 154.12314,174.44115 C 154.44771,177.23176 155.19824,182.77356 155.33018,186.56909 C 155.46253,190.37647 155.68531,193.16919 155.64483,194.90949 C 155.60435,196.66001 155.56382,199.89547 154.13894,200.99365 C 152.71406,202.09183 151.58015,202.41404 149.42649,203.19775 C 147.27283,203.98146 143.49573,204.3672 140.53807,204.42927 C 137.34621,204.49611 133.7784,203.91388 131.68369,202.62981 C 129.58898,201.34574 129.02929,198.86936 128.66795,195.24324 C 128.30642,191.61516 128.54092,188.08556 129.2206,184.20222 C 129.90027,180.31888 130.50102,176.0831 131.9603,172.72422 L 137.64481,172.98943 z "
+       id="path6210"
+       sodipodi:nodetypes="czczczzzzszzzcc" />
+    <path
+       style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline"
+       d="M 185.78754,171.9157 C 186.07351,171.87572 185.54793,176.85422 184.6899,178.87242 C 183.83125,180.89211 183.19575,183.07996 182.95198,184.482 C 182.71553,185.84194 182.17854,189.20248 182.21785,190.29355 C 182.26001,191.46346 182.53143,194.42829 183.24185,195.29019 C 183.72599,195.87757 186.77188,195.71417 187.34922,195.6673 C 189.21035,195.51733 190.6623,194.86361 191.19771,194.53657 C 191.87004,194.1259 192.28041,193.50167 192.31305,192.73126 C 192.40857,190.45129 192.09425,188.69477 191.57998,185.60478 C 191.06971,182.53881 190.19268,180.67022 189.18665,177.94604 C 188.17406,175.20409 187.3604,173.57761 185.78754,171.9157 z "
+       id="path6212"
+       sodipodi:nodetypes="czzssssszzc" />
+    <path
+       style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline"
+       d="M 100.44928,158.49977 C 100.11595,157.7163 100.02536,162.19464 99.89907,166.14502 C 99.77199,170.12009 99.89024,173.57637 100.17308,175.49993 C 100.78831,177.73767 101.24724,179.4407 103.10773,180.4749 C 104.94394,181.49559 108.34798,182.09767 111.2919,182.07732 C 114.22561,182.05698 116.74337,181.63214 118.22675,181.09452 C 119.69209,180.56344 120.7611,179.90521 121.37485,178.81865 C 121.9886,177.7321 122.39744,176.58249 122.23521,174.78276 C 122.07595,173.01595 122.03772,171.5291 121.55359,170.04681 C 120.26506,170.13804 115.50338,169.63826 113.91024,168.87555 C 110.81953,167.71117 107.92003,165.97098 105.57836,163.62774 C 104.4549,162.63428 103.325,161.64036 102.44964,160.40969 C 101.86847,159.69372 101.25326,158.97352 100.44928,158.49977 z "
+       id="path6214"
+       sodipodi:nodetypes="czczzzzzccccc" />
+    <path
+       sodipodi:nodetypes="cczzczzscczzzzcccccc"
+       id="path8166"
+       d="M 103.08012,124.10065 C 111.46685,123.01838 119.81033,120.98916 127.66152,119.19761 C 127.24902,115.72887 128.40169,110.62222 132.14264,106.67801 C 135.88358,102.7338 140.27088,101.37465 144.50701,101.35735 C 148.7768,101.33991 152.77191,102.51225 155.45156,103.95372 C 157.33694,102.44194 160.72372,99.171893 165.36306,98.506574 C 169.9862,97.843574 174.33291,100.14602 176.32859,101.8204 C 178.34557,103.51264 179.7109,106.02299 180.57822,108.70387 C 181.38327,111.19226 182.07727,113.707 182.57263,116.10776 C 183.61952,115.66607 183.37074,116.10327 184.33376,115.72016 C 183.51997,107.41291 181.33879,102.15853 179.98907,99.799443 C 177.28962,95.081237 172.38867,90.078495 164.25697,87.486614 C 156.12527,84.894734 143.55771,84.474689 132.99362,87.671051 C 122.25815,90.919269 115.6473,95.209539 112.32384,99.210713 C 108.87134,103.36723 105.97335,108.1865 104.70273,112.2177 C 98.13133,112.70705 93.253842,108.90339 92.420012,103.36663 C 92.085309,99.012163 97.45215,93.0901
 05 91.085808,91.460304 C 84.921648,91.913656 88.545908,99.921263 89.532576,103.42158 C 89.946729,110.63701 96.88027,115.64612 103.67642,116.05859 C 102.90211,119.91686 103.16748,121.56538 103.08012,124.10065 z "
+       style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline" />
+    <path
+       style="fill:#005300;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:1.46393025;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline"
+       d="M 141.3828,100.92817 C 142.16626,95.062451 143.11421,89.411721 145.32058,86.001921 C 147.62729,82.43707 151.31136,77.835319 156.09471,77.903109 C 160.87507,77.970856 162.20867,81.541125 163.16876,85.881051 C 164.18028,90.453437 166.71915,91.632782 167.25829,97.644742 C 162.48477,97.701389 159.2831,99.354882 155.23636,102.01971 C 150.87266,100.99432 146.34683,100.21089 141.3828,100.92817 z "
+       id="path3218"
+       sodipodi:nodetypes="cszzccc" />
+    <path
+       sodipodi:type="arc"
+       style="fill:#005300;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline"
+       id="path5250"
+       sodipodi:cx="160.83333"
+       sodipodi:cy="71.769928"
+       sodipodi:rx="9.5715446"
+       sodipodi:ry="9.5715446"
+       d="M 170.40487 71.769928 A 9.5715446 9.5715446 0 1 1  151.26178,71.769928 A 9.5715446 9.5715446 0 1 1  170.40487 71.769928 z"
+       transform="matrix(0.2025764,0.7188471,-0.747267,0.4427392,179.82392,-77.975063)" />
+    <path
+       style="fill:#ff35cc;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline"
+       d="M 143.68503,105.19188 C 149.25201,108.27272 155.88122,112.10404 162.18614,122.02177 C 164.59259,114.81381 164.55464,106.20425 167.7937,102.17679 C 162.97025,102.85026 160.22988,103.91305 157.55824,107.22102 C 154.02025,105.11791 147.82861,103.36472 143.68503,105.19188 z "
+       id="path5254"
+       sodipodi:nodetypes="ccccc" />
+    <path
+       transform="scale(0.9931536,1.0068936)"
+       style="font-size:23.15151978px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:100%;writing-mode:lr-tb;text-anchor:start;fill:#005300;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:ReservoirGrunge"
+       d="M 77.779074,244.81677 L 78.080055,245.11775 C 78.959446,245.11775 79.399142,245.11775 79.399143,245.11775 L 80.718938,245.14107 C 82.416957,245.14107 83.752058,245.0869 84.724247,244.97856 L 84.863433,244.81677 C 84.971289,242.00762 85.025221,240.27121 85.025228,239.60754 L 85.025228,236.87539 C 85.025221,236.24282 85.117776,235.27818 85.302894,233.98145 L 85.44208,233.98145 C 85.581023,234.16704 85.696658,234.42186 85.788986,234.74591 C 85.943471,235.28618 86.020718,235.55631 86.020727,235.5563 L 89.076462,243.82127 C 89.323735,244.46939 89.586328,244.84739 89.864241,244.95525 C 89.910388,244.97079 90.419559,245.00164 91.391755,245.0478 C 92.302691,245.09444 92.935504,245.11775 93.290197,245.11775 C 93.92323,245.11775 94.586895,245.07136 95.281193,244.97856 C 95.512917,244.82407 95.705799,244.53086 95.85984,244.09894 L 98.75378,236.43593 C 98.831006,236.29699 98.931333,236.08856 99.054761,235.81065 C 99.224777,235.23979 99.34842,234.86933 99.425689,234.69928 C 99.
 641864,234.23628 99.857826,233.94307 100.07357,233.81966 C 100.07355,234.03587 100.09675,234.35616 100.14317,234.78053 C 100.18954,235.20493 100.21274,235.52523 100.21276,235.74141 C 100.21274,236.00378 100.16634,236.18889 100.07357,236.29674 C 100.07355,237.19216 100.07355,237.63986 100.07357,237.63985 C 100.07355,237.82497 100.06578,238.07956 100.05026,238.40361 C 100.05024,238.66598 100.05024,238.85109 100.05026,238.95894 C 100.05024,241.10444 100.1044,243.01065 100.21276,244.67758 L 100.49043,244.97856 L 106.71847,244.97856 C 106.99635,244.97856 107.18923,244.87824 107.29712,244.67758 C 107.29709,243.68986 107.32017,242.21981 107.36636,240.26743 C 107.41249,238.31507 107.43557,236.84502 107.4356,235.85728 C 107.43557,235.78005 107.46265,235.66041 107.51685,235.49837 C 107.57099,235.33635 107.59807,235.21671 107.5981,235.13945 L 107.5981,227.45313 C 107.59807,226.68115 107.30486,226.29515 106.71847,226.29513 L 102.50474,226.29513 C 101.62485,226.29515 99.510214,226.34932 
 96.160821,226.45763 L 95.85984,226.59611 C 95.582392,227.07468 95.38951,227.46068 95.281193,227.75411 C 95.003275,228.5412 94.586659,229.71427 94.031345,231.27333 C 93.336576,233.17155 92.842713,234.12065 92.549755,234.12064 C 92.441406,233.91999 92.294684,233.62679 92.109588,233.24101 L 89.933481,227.17546 C 89.763431,226.69693 89.570549,226.45765 89.354834,226.45763 C 89.045834,226.45765 88.586591,226.43057 87.977104,226.37638 C 87.367595,226.32223 86.908353,226.29515 86.599374,226.29513 C 86.800018,226.29515 84.924894,226.33377 80.973995,226.411 C 80.541597,226.41102 79.769833,226.42656 78.658702,226.45763 C 78.33464,226.47319 78.141758,226.56575 78.080055,226.7353 C 77.97172,226.99767 77.917553,227.62271 77.917554,228.61042 L 77.917554,235.99576 L 77.779074,236.29674 L 77.779074,244.81677 z M 108.35267,245.11775 C 111.71716,245.24116 113.71592,245.30286 114.34898,245.30286 C 114.93539,245.30286 115.35201,245.19476 115.59883,244.97856 C 115.89227,244.16041 116.17794,243.6
 7432 116.45585,243.52029 C 117.0578,243.52029 117.96451,243.49321 119.17598,243.43904 C 120.38743,243.38487 121.29414,243.35779 121.89612,243.35779 C 123.33177,243.35779 124.14215,243.45812 124.32728,243.65877 C 124.38897,243.76711 124.5663,244.04501 124.8593,244.49247 C 125.09102,244.86269 125.3303,245.07112 125.57713,245.11775 C 125.8861,245.11775 126.42636,245.12552 127.1979,245.14107 L 132.24464,245.27955 C 132.72317,245.27955 132.96244,245.12529 132.96247,244.81677 L 128.47107,234.12064 C 128.17807,233.47253 127.66113,232.26083 126.92024,230.48555 C 126.34887,229.06591 125.80862,227.86223 125.29946,226.87448 C 125.16049,226.5966 124.91345,226.45765 124.55831,226.45763 L 121.66437,226.43432 C 119.96681,226.41879 118.23064,226.47272 116.45585,226.59611 C 116.31689,226.73508 116.17794,226.8818 116.039,227.03628 L 108.35267,243.82127 C 108.16756,244.22258 108.07501,244.5311 108.07501,244.74682 L 108.07501,244.81677 C 108.12117,244.87847 108.21372,244.9788 108.35267,245.1177
 5 L 108.35267,245.11775 z M 118.47016,238.12595 C 118.47015,237.75526 118.67834,237.01435 119.09473,235.90321 C 119.54266,234.69929 119.89781,234.09733 120.16018,234.09732 C 120.34527,234.09733 120.69241,234.68387 121.2016,235.85693 C 121.71076,237.03001 121.96534,237.77858 121.96536,238.10263 C 121.96534,238.33438 121.54095,238.45779 120.69219,238.47285 C 121.41755,238.45779 120.90838,238.45025 119.16468,238.45024 C 119.11803,238.45025 119.04067,238.4579 118.93258,238.47321 C 118.82447,238.48852 118.74734,238.49617 118.70119,238.49617 C 118.54716,238.49617 118.47015,238.37277 118.47016,238.12595 L 118.47016,238.12595 z M 133.4422,244.63166 C 133.4422,244.95572 133.60423,245.11775 133.92829,245.11775 C 136.19719,245.11775 138.39661,245.07136 140.52656,244.97856 L 140.89678,244.67758 L 140.89678,238.61204 C 140.89677,238.30353 141.18998,238.11842 141.77641,238.05671 C 141.91535,238.04117 142.31666,238.0334 142.98033,238.03339 C 143.62845,238.0334 144.59309,238.05648 145.87427
 ,238.10263 C 147.15543,238.1488 148.12008,238.17188 148.76821,238.17187 C 148.87606,238.23358 148.97639,238.3803 149.0692,238.61204 C 149.05364,239.13676 149.04586,239.6537 149.04588,240.16287 C 149.04586,241.72194 149.10003,243.28101 149.20838,244.84009 C 149.33177,245.0252 149.47826,245.11775 149.64784,245.11775 L 155.50567,245.11775 C 156.21547,245.11775 156.57815,244.83208 156.59372,244.26073 C 156.5937,243.79772 156.60901,242.95672 156.63965,241.73772 C 156.63962,241.30533 156.65517,240.34822 156.68628,238.86639 C 156.71687,237.47736 156.73218,236.52049 156.7322,235.99576 L 156.7322,227.33726 C 156.73218,227.18278 156.68979,226.99756 156.60503,226.78157 C 156.52022,226.56563 156.41612,226.45765 156.29274,226.45763 L 149.64784,226.45763 C 149.30822,226.45765 149.13842,226.69693 149.13843,227.17546 L 149.13843,232.82416 C 148.93777,232.96312 148.76796,233.06345 148.62903,233.12514 C 148.1043,233.12515 147.30945,233.14835 146.24449,233.19473 C 145.17951,233.24114 144.38467
 ,233.26434 143.85996,233.26433 C 143.47419,233.26434 143.03449,233.22571 142.54087,233.14845 C 141.81502,233.04013 141.38263,232.97843 141.24369,232.96334 L 141.03597,232.68497 L 141.03597,226.75861 C 141.02042,226.72754 140.93928,226.66961 140.79257,226.58481 C 140.64584,226.50004 140.55717,226.45765 140.52656,226.45763 L 134.46031,226.45763 C 134.02838,226.45765 133.78157,226.55798 133.71987,226.75861 C 133.64262,227.02099 133.58869,227.66157 133.55807,228.68037 C 133.54253,229.2668 133.54253,230.22368 133.55807,231.55099 L 133.58068,233.54199 C 133.58068,233.80436 133.5576,234.19024 133.51144,234.69964 C 133.46528,235.20905 133.4422,235.59494 133.4422,235.85728 L 133.4422,237.29224 L 133.41889,238.72791 L 133.4422,244.63166 z M 157.18156,236.3893 C 157.18155,237.79389 157.56732,239.20623 158.33885,240.62635 C 159.52723,242.818 161.44875,244.25343 164.10341,244.93264 C 165.66248,245.33395 168.14757,245.5346 171.5587,245.5346 C 176.5284,245.5346 179.89288,243.79819 181.6521
 6,240.32537 C 182.40859,238.82801 182.78682,237.24586 182.78685,235.57891 C 182.78682,229.09677 178.31097,225.85569 169.35928,225.85567 C 165.63963,225.85569 162.73792,226.6583 160.65414,228.26352 C 158.33908,230.06942 157.18155,232.77801 157.18156,236.3893 L 157.18156,236.3893 z M 165.28402,236.27343 C 165.28401,234.74545 165.65447,233.47983 166.39539,232.47655 C 167.22908,231.33481 168.37908,230.76394 169.84537,230.76392 C 170.10771,230.76394 170.23889,230.76394 170.2389,230.76392 C 170.59357,230.77901 170.72475,230.78654 170.63244,230.78653 C 170.81754,230.91042 171.07989,231.01852 171.41951,231.11083 C 171.92867,231.24979 172.21411,231.32704 172.27583,231.34257 C 173.07843,231.6511 173.69593,232.27614 174.12834,233.21769 C 174.49855,234.00478 174.68366,234.88464 174.68367,235.85728 C 174.68366,237.06075 174.27481,238.14103 173.45714,239.09813 C 172.60788,240.11695 171.59684,240.62635 170.42402,240.62635 C 169.00388,240.62635 167.8155,240.26367 166.85887,239.5383 C 165.80
 896,238.75123 165.28401,237.66294 165.28402,236.27343 L 165.28402,236.27343 z M 194.75544,245.67308 C 201.17588,245.67308 204.82603,243.74379 205.70592,239.8852 C 205.72144,239.8235 205.76006,239.31433 205.82179,238.35769 C 205.86793,237.63186 205.97579,237.17638 206.14538,236.99126 L 206.14538,229.74511 C 206.14535,228.00094 206.04503,226.94374 205.8444,226.5735 C 204.67154,226.46519 203.59126,226.41102 202.60355,226.411 C 201.78537,226.41102 200.59699,226.46519 199.03841,226.5735 L 198.76004,226.85117 L 198.6908,231.94453 C 198.69078,232.68546 198.69078,233.78905 198.6908,235.25532 C 198.67524,237.46205 198.33563,238.95518 197.67198,239.73471 C 197.0083,240.51425 196.04366,240.90402 194.77804,240.90402 C 193.71306,240.90402 192.81035,240.64166 192.06992,240.11694 C 191.22067,239.53053 190.79604,238.72792 190.79605,237.70909 L 190.79605,227.01296 C 190.79604,226.62722 190.60316,226.43433 190.2174,226.43432 L 183.98936,226.43432 C 183.88149,226.43433 183.68861,226.57329 183.
 41071,226.85117 C 183.41071,227.97786 183.38763,229.66788 183.34147,231.92121 C 183.29531,234.17458 183.27223,235.86459 183.27223,236.99126 C 183.27223,239.46082 183.84711,241.3708 184.99687,242.72121 C 186.14662,244.07162 187.93319,244.90909 190.35659,245.23362 C 192.57885,245.52659 194.04513,245.67308 194.75544,245.67308 L 194.75544,245.67308 z M 206.60462,226.89709 L 206.60462,230.94903 C 206.60462,231.27311 207.50756,231.43514 209.31345,231.43512 C 209.77646,231.43514 210.4632,231.42737 211.37369,231.41181 L 213.24881,231.38849 C 213.54225,231.38851 213.68897,231.67418 213.68898,232.24551 L 213.68898,244.67758 C 213.68897,244.97103 213.83546,245.11775 214.12844,245.11775 L 220.77334,245.11775 L 220.91182,244.84009 C 220.95843,242.1388 221.0126,239.4839 221.07432,236.87539 L 221.2128,231.66686 C 221.38282,231.4813 221.52954,231.35765 221.65297,231.29594 L 228.15868,231.29594 L 228.15868,226.89709 L 227.8577,226.59611 C 225.34197,226.50381 223.63641,226.45765 222.74102,226
 .45763 L 209.75291,226.45763 C 209.50609,226.45765 208.54922,226.50381 206.88229,226.59611 L 206.60462,226.89709 z "
+       id="text9138" />
+  </g>
+</svg>

Added: mahout/site/new_website/MAHOUT/faq.data/mahout_fast_feather.pdf
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/faq.data/mahout_fast_feather.pdf?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/faq.data/mahout_fast_feather.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



Mime
View raw message