mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1243022 [10/38] - in /mahout/site/new_website: ./ MAHOUT/ MAHOUT/2010/ MAHOUT/2010/09/ MAHOUT/2010/09/14/ MAHOUT/2011/ MAHOUT/2011/10/ MAHOUT/2011/10/21/ MAHOUT/books-tutorials-and-talks.data/ MAHOUT/books-tutorials-talks.data/ MAHOUT/book...
Date Sat, 11 Feb 2012 10:22:31 GMT
Added: mahout/site/new_website/MAHOUT/faq.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/faq.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/faq.html (added)
+++ mahout/site/new_website/MAHOUT/faq.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,180 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>FAQ</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="developer-resources.html" title="Developer Resources">Developer Resources</A>&nbsp;&gt;&nbsp;<A href="" title="FAQ">FAQ</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">FAQ</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=74837">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=74837">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=74837">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=74837">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=74837">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=74837">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>The Official Mahout FAQ</P>
+
+<P><B>General</B></P>
+
+<OL>
+	<LI><A href="#FAQ-whatIs">What is Apache Mahout?</A></LI>
+	<LI><A href="#FAQ-mean">What does the name mean?</A></LI>
+	<LI><A href="#FAQ-historical">Where can I find the origins of the Mahout project? </A></LI>
+	<LI><A href="#FAQ-logo">Where can I download Mahout logo? </A></LI>
+	<LI><A href="#FAQ-presentations">Where can I download Mahout slide presentations? </A></LI>
+</OL>
+
+
+<P><B>Algorithms</B></P>
+<OL>
+	<LI><A href="#FAQ-algos">What algorithms are implemented in Mahout?</A></LI>
+	<LI><A href="#FAQ-todo">What algorithms are missing from Mahout?</A></LI>
+</OL>
+
+
+
+<H1><A name="FAQ-Answers"></A><B>Answers</B></H1>
+<H2><A name="FAQ-General"></A>General</H2>
+<H3><A name="FAQ-WhatisApacheMahout%3F"></A><A name="FAQ-whatIs"></A>What is Apache Mahout?</H3>
+<P>Apache Mahout is a suite of machine learning libraries designed to be scalable and robust</P>
+
+<H3><A name="FAQ-Whatdoesthenamemean%3F"></A><A name="FAQ-mean"></A>What does the name mean?</H3>
+<P>The name <A href="http://en.wikipedia.org/wiki/Mahout" class="external-link" rel="nofollow">Mahout</A> was original chosen for it's association with the <A href="http://hadoop.apache.org/" class="external-link" rel="nofollow">Apache Hadoop</A> project.  A Mahout is a person who drives an elephant (hint: Hadoop's logo is an elephant.)  We just wanted a name that complemented Hadoop but we see our project as a good driver of Hadoop in the sense that we will be using and testing it.  We are not, however, implying that we are controlling Hadoop's development.</P>
+
+<H3><A name="FAQ-WherecanIfindtheoriginsoftheMahoutproject%3F"></A><A name="FAQ-historical"></A>Where can I find the origins of the Mahout project?</H3>
+
+<P>See <A href="http://ml-site.grantingersoll.com/" class="external-link" rel="nofollow">http://ml-site.grantingersoll.com</A> for old wiki and mailing list archives (all read-only)</P>
+
+<H3><A name="FAQ-WherecanIdownloadMahoutlogo%3F"></A><A name="FAQ-logo"></A>Where can I download Mahout logo?</H3>
+<P>See <A href="https://issues.apache.org/jira/browse/MAHOUT-335" class="external-link" rel="nofollow">MAHOUT-335</A></P>
+
+<H3><A name="FAQ-WherecanIdownloadMahoutslidepresentations%3F"></A><A name="FAQ-presentations"></A> Where can I download Mahout slide presentations?</H3>
+<UL>
+	<LI><A href="http://cwiki.apache.org/MAHOUT/bookstutorialstalks.html" class="external-link" rel="nofollow">Books, Tutorials and Talks </A> Wiki page containing an overview of all presentations with links to slides where available.</LI>
+</UL>
+
+
+<H2><A name="FAQ-Algorithms"></A>Algorithms</H2>
+
+<H3><A name="FAQ-WhatalgorithmsareimplementedinMahout%3F"></A><A name="FAQ-algos"></A> What algorithms are implemented in Mahout?</H3>
+<P>We are interested in a wide variety of machine learning algorithms. Many of which are already implemented in Mahout. You can find them <A href="http://cwiki.apache.org/confluence/display/MAHOUT/Algorithms" class="external-link" rel="nofollow">here </A>.  </P>
+
+<H3><A name="FAQ-WhatalgorithmsaremissingfromMahout%3F"></A><A name="FAQ-todo"></A> What algorithms are missing from Mahout?</H3>
+<P>There are many machine learning algorithms that we would like to have in Mahout, including some from the paper <A href="http://www.cs.stanford.edu/people/ang/papers/nips06-mapreducemulticore.pdf" class="external-link" rel="nofollow">http://www.cs.stanford.edu/people/ang/papers/nips06-mapreducemulticore.pdf</A>. If you have an algorithm or an improvement to an algorithm that you would like to implement, by all means submit a patch.</P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/file-format-integrations.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/file-format-integrations.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/file-format-integrations.html (added)
+++ mahout/site/new_website/MAHOUT/file-format-integrations.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,230 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>File Format Integrations</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="" title="File Format Integrations">File Format Integrations</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">File Format Integrations</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=27830941">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=27830941">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=27830941">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=27830941">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=27830941">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=27830941">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>There are several importers and exporters for common file formats.</P>
+<DIV>
+<UL>
+    <LI><A href="#FileFormatIntegrations-Generalpurposeconvertors">General-purpose convertors</A></LI>
+<UL>
+    <LI><A href="#FileFormatIntegrations-Importer%2527bin%252Fmahout%2527jobs">Importer 'bin/mahout' jobs</A></LI>
+    <LI><A href="#FileFormatIntegrations-Exporter%2527bin%252Fmahout%2527jobs">Exporter 'bin/mahout' jobs</A></LI>
+    <LI><A href="#FileFormatIntegrations-Importerclasses">Importer classes</A></LI>
+    <LI><A href="#FileFormatIntegrations-Exporterclasses">Exporter classes</A></LI>
+</UL>
+    <LI><A href="#FileFormatIntegrations-Examples">Examples</A></LI>
+<UL>
+    <LI><A href="#FileFormatIntegrations-RegexConverter">Regex Converter</A></LI>
+</UL>
+</UL></DIV>
+<H2><A name="FileFormatIntegrations-Generalpurposeconvertors"></A>General-purpose convertors</H2>
+<H3><A name="FileFormatIntegrations-Importer%27bin%2Fmahout%27jobs"></A>Importer 'bin/mahout' jobs</H3>
+<P>Run these with --help to see options</P>
+<UL>
+	<LI>bin/mahout arff.vector</LI>
+	<LI>bin/mahout lucene.vector</LI>
+	<LI>bin/mahout seqdirectory
+	<UL>
+		<LI>turns text files into sequence files, one file per key/value pair</LI>
+	</UL>
+	</LI>
+	<LI>bin/mahout SequenceFilesFromMailArchives
+	<UL>
+		<LI>parses mailboxes and emits one text body per mail message</LI>
+	</UL>
+	</LI>
+	<LI>bin/mahout regexconverter
+	<UL>
+		<LI>reads text lines and emits the regex output lines into SequenceFiles.</LI>
+	</UL>
+	</LI>
+</UL>
+
+
+<H3><A name="FileFormatIntegrations-Exporter%27bin%2Fmahout%27jobs"></A>Exporter 'bin/mahout' jobs</H3>
+<P>Some programs exist to dump text versions of SequenceFiles for perusal. Run these with --help to see options.</P>
+<UL>
+	<LI>bin/mahout clusterdump</LI>
+	<LI>bin/mahout cmdump</LI>
+	<LI>bin/mahout matrixdump</LI>
+	<LI>bin/mahout seqdumper</LI>
+	<LI>bin/mahout vectordump</LI>
+</UL>
+
+
+<P><B>Note:</B> all classes with a 'main' method can be used as a bin/mahout job name.</P>
+
+<H3><A name="FileFormatIntegrations-Importerclasses"></A>Importer classes</H3>
+
+<P>These are not main() classes and must be coded against.</P>
+<UL>
+	<LI>CSVVectorIterator imports CSV files into vectors.</LI>
+</UL>
+
+
+<H3><A name="FileFormatIntegrations-Exporterclasses"></A>Exporter classes</H3>
+
+<UL>
+	<LI><B>GraphMLClusterWriter</B> saves cluster data in the <A href="http://graphml.graphdrawing.org/" class="external-link" rel="nofollow">GraphML</A></LI>
+	<LI><B>CSVClusterWriter</B> saves clusters in a csv-based format.</LI>
+</UL>
+
+
+<P>Both of these formats are read by the <A href="http://gephi.org/" class="external-link" rel="nofollow">Gephi</A> program, an interactive graph explorer. </P>
+
+<P>There are many file importers which are custom-made for particular algorithms:</P>
+<UL>
+	<LI>The various text -&gt; Lucene index converters</LI>
+</UL>
+
+
+<H2><A name="FileFormatIntegrations-Examples"></A>Examples</H2>
+<H3><A name="FileFormatIntegrations-RegexConverter"></A>Regex Converter</H3>
+<P>For example, the following will extract queries from HTTP request logs to <A href="http://lucene.apache.org/" class="external-link" rel="nofollow">Solr</A> and prepare them for use by Frequent Itemset Mining.</P>
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+bin/mahout regexconverter --input /Users/grantingersoll/projects/content/lucid/lucidfind/logs --output /tmp/solr/output --regex <SPAN class="code-quote">&quot;(?&lt;=(\?|&amp;)q=).*?(?=&amp;|$)&quot;</SPAN> --overwrite --transformerClass url --formatterClass fpg
+</PRE>
+</DIV></DIV>
+<P>See <A href="http://download.oracle.com/javase/tutorial/essential/regex/" class="external-link" rel="nofollow">tutorial</A> and <A href="http://www.omicentral.com/cheatsheets/JavaRegularExpressionsCheatSheet.pdf" class="external-link" rel="nofollow">cheat sheet</A> for this marvelously opaque toolkit.</P>
+
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/fuzzy-k-means-commandline.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/fuzzy-k-means-commandline.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/fuzzy-k-means-commandline.html (added)
+++ mahout/site/new_website/MAHOUT/fuzzy-k-means-commandline.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,236 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>fuzzy-k-means-commandline</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="quickstart.html" title="Quickstart">Quickstart</A>&nbsp;&gt;&nbsp;<A href="clusteringyourdata.html" title="ClusteringYourData">ClusteringYourData</A>&nbsp;&gt;&nbsp;<A href="" title="fuzzy-k-means-commandline">fuzzy-k-means-commandline</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">fuzzy-k-means-commandline</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=3474174">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=3474174">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=3474174">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=3474174">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=3474174">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=3474174">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="fuzzy-k-means-commandline-RunningFuzzykMeansClusteringfromtheCommandLine"></A>Running Fuzzy k-Means Clustering from the Command Line</H1>
+<P>Mahout's Fuzzy k-Means clustering can be launched from the same command line invocation whether you are running on a single machine in stand-alone mode or on a larger Hadoop cluster. The difference is determined by the $HADOOP_HOME and $HADOOP_CONF_DIR environment variables. If both are set to an operating Hadoop cluster on the target machine then the invocation will run FuzzyK on that cluster. If either of the environment variables are missing then the stand-alone Hadoop configuration will be invoked instead.</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+./bin/mahout fkmeans &lt;OPTIONS&gt;
+</PRE>
+</DIV></DIV>
+
+<UL>
+	<LI>In $MAHOUT_HOME/, build the jar containing the job (mvn install) The job will be generated in $MAHOUT_HOME/core/target/ and it's name will contain the Mahout version number. For example, when using Mahout 0.3 release, the job will be mahout-core-0.3.job</LI>
+</UL>
+
+
+
+<H2><A name="fuzzy-k-means-commandline-Testingitononesinglemachinew%2Focluster"></A>Testing it on one single machine w/o cluster</H2>
+
+<UL>
+	<LI>Put the data: cp &lt;PATH TO DATA&gt; testdata</LI>
+	<LI>Run the Job:
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+./bin/mahout fkmeans -i testdata &lt;OPTIONS&gt;
+</PRE>
+</DIV></DIV></LI>
+</UL>
+
+
+<H2><A name="fuzzy-k-means-commandline-Runningitonthecluster"></A>Running it on the cluster</H2>
+
+<UL>
+	<LI>(As needed) Start up Hadoop: $HADOOP_HOME/bin/start-all.sh</LI>
+	<LI>Put the data: $HADOOP_HOME/bin/hadoop fs -put &lt;PATH TO DATA&gt; testdata</LI>
+	<LI>Run the Job:
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+export HADOOP_HOME=&lt;Hadoop Home Directory&gt;
+export HADOOP_CONF_DIR=$HADOOP_HOME/conf
+./bin/mahout fkmeans -i testdata &lt;OPTIONS&gt;
+</PRE>
+</DIV></DIV></LI>
+	<LI>Get the data out of HDFS and have a look. Use bin/hadoop fs -lsr output to view all outputs.</LI>
+</UL>
+
+
+<H1><A name="fuzzy-k-means-commandline-Commandlineoptions"></A>Command line options</H1>
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+  --input (-i) input                           Path to job input directory.     
+                                               Must be a SequenceFile of        
+                                               VectorWritable                   
+  --clusters (-c) clusters                     The input centroids, as Vectors. 
+                                               Must be a SequenceFile of        
+                                               Writable, Cluster/Canopy.  If k  
+                                               is also specified, then a random 
+                                               set of vectors will be selected  
+                                               and written out to <SPAN class="code-keyword">this</SPAN> path     
+                                               first                            
+  --output (-o) output                         The directory pathname <SPAN class="code-keyword">for</SPAN>       
+                                               output.                          
+  --distanceMeasure (-dm) distanceMeasure      The classname of the             
+                                               DistanceMeasure. Default is      
+                                               SquaredEuclidean                 
+  --convergenceDelta (-cd) convergenceDelta    The convergence delta value.     
+                                               Default is 0.5                   
+  --maxIter (-x) maxIter                       The maximum number of            
+                                               iterations.                      
+  --k (-k) k                                   The k in k-Means.  If specified, 
+                                               then a random selection of k     
+                                               Vectors will be chosen as the    
+                                               Centroid and written to the      
+                                               clusters input path.             
+  --m (-m) m                                   coefficient normalization        
+                                               factor, must be greater than 1   
+  --overwrite (-ow)                            If present, overwrite the output 
+                                               directory before running job     
+  --help (-h)                                  Print out help                   
+  --numMap (-u) numMap                         The number of map tasks.         
+                                               Defaults to 10                   
+  --maxRed (-r) maxRed                         The number of reduce tasks.      
+                                               Defaults to 2                    
+  --emitMostLikely (-e) emitMostLikely         True <SPAN class="code-keyword">if</SPAN> clustering should emit   
+                                               the most likely point only,      
+                                               <SPAN class="code-keyword">false</SPAN> <SPAN class="code-keyword">for</SPAN> threshold clustering.  
+                                               Default is <SPAN class="code-keyword">true</SPAN>                  
+  --threshold (-t) threshold                   The pdf threshold used <SPAN class="code-keyword">for</SPAN>       
+                                               cluster determination. Default   
+                                               is 0 
+  --clustering (-cl)                           If present, run clustering after 
+                                               the iterations have taken place  
+                            
+</PRE>
+</DIV></DIV>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/2dFuzzyKMeans.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/fuzzy-k-means.data/2dFuzzyKMeans.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/2dFuzzyKMeans.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/2dFuzzyKMeans.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/fuzzy-k-means.data/2dFuzzyKMeans.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/2dFuzzyKMeans.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/FuzzyKMeans.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/fuzzy-k-means.data/FuzzyKMeans.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/FuzzyKMeans.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/FuzzyKMeans.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/fuzzy-k-means.data/FuzzyKMeans.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/FuzzyKMeans.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/SampleData.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/fuzzy-k-means.data/SampleData.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/SampleData.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/SampleData.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/fuzzy-k-means.data/SampleData.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/fuzzy-k-means.data/SampleData.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/fuzzy-k-means.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/fuzzy-k-means.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/fuzzy-k-means.html (added)
+++ mahout/site/new_website/MAHOUT/fuzzy-k-means.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,257 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Fuzzy K-Means</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Fuzzy K-Means">Fuzzy K-Means</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Fuzzy K-Means</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=95315">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=95315">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=95315">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=95315">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=95315">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=95315">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Fuzzy K-Means (also called Fuzzy C-Means) is an extension of <A href="http://cwiki.apache.org/MAHOUT/k-means.html" class="external-link" rel="nofollow">K-Means</A>, the popular simple clustering technique. While K-Means discovers hard clusters (a point belong to only one cluster), Fuzzy K-Means is a more statistically formalized method and discovers soft clusters where a particular point can belong to more than one cluster with certain probability.</P>
+
+<H4><A name="FuzzyK-Means-Algorithm"></A>Algorithm</H4>
+
+<P>Like K-Means, Fuzzy K-Means works on those objects which can be represented in n-dimensional vector space and a distance measure is defined.<BR>
+The algorithm is similar to k-means.</P>
+<UL>
+	<LI>Initialize k clusters</LI>
+	<LI>Until converged
+	<UL>
+		<LI>Compute the probability of a point belong to a cluster for every &lt;point,cluster&gt; pair</LI>
+		<LI>Recompute the cluster centers using above probability membership values of points to clusters</LI>
+	</UL>
+	</LI>
+</UL>
+
+
+<H4><A name="FuzzyK-Means-DesignImplementation"></A>Design Implementation</H4>
+
+<P>The design is similar to K-Means present in Mahout. It accepts an input file containing vector points. User can either provide the cluster centers as input or can allow canopy algorithm to run and create initial clusters.</P>
+
+<P>Similar to K-Means, the program doesn't modify the input directories. And for every iteration, the cluster output is stored in a directory cluster-N. The code has set number of reduce tasks equal to number of map tasks. So, those many part-0
+<BR class="atl-forced-newline">
+&#42; files are created in clusterN directory. The code uses driver/mapper/combiner/reducer as follows:</P>
+
+<P>FuzzyKMeansDriver - This is similar to&nbsp; KMeansDriver. It iterates over input points and cluster points for specified number of iterations or until it is converged.During every iteration i, a new cluster-i directory is created which contains the modified cluster centers obtained during FuzzyKMeans iteration. This will be feeded as input clusters in the next iteration.&nbsp; Once Fuzzy KMeans is run for specified number of iterations or until it is converged, a map task is run to output &quot;the point and the cluster membership to each cluster&quot; pair as final output to a directory named &quot;points&quot;.</P>
+
+<P>FuzzyKMeansMapper - reads the input cluster during its configure() method, then&nbsp; computes cluster membership probability of a point to each cluster.Cluster membership is inversely propotional to the distance. Distance is computed using&nbsp; user supplied distance measure. Output key is encoded clusterId. Output values are ClusterObservations containing observation statistics.</P>
+
+<P>FuzzyKMeansCombiner - receives all key:value pairs from the mapper and produces partial sums of the cluster membership probability times input vectors for each cluster. Output key is: encoded cluster identifier. Output values are ClusterObservations containing observation statistics.</P>
+
+<P>FuzzyKMeansReducer - Multiple reducers receives certain keys and all values associated with those keys. The reducer sums the values to produce a new centroid for the cluster which is output. Output key is: encoded cluster identifier (e.g. &quot;C14&quot;. Output value is: formatted cluster identifier (e.g. &quot;C14&quot;). The reducer encodes unconverged clusters with a 'Cn' cluster Id and converged clusters with 'Vn' clusterId.</P>
+
+<H2><A name="FuzzyK-Means-RunningFuzzykMeansClustering"></A>Running Fuzzy k-Means Clustering</H2>
+
+<P>The Fuzzy k-Means clustering algorithm may be run using a command-line invocation on FuzzyKMeansDriver.main or by making a Java call to FuzzyKMeansDriver.run(). </P>
+
+<P>Invocation using the command line takes the form:</P>
+
+<DIV class="preformatted panel" style="border-width: 1px;"><DIV class="preformattedContent panelContent">
+<PRE>bin/mahout fkmeans \
+    -i &lt;input vectors directory&gt; \
+    -c &lt;input clusters directory&gt; \
+    -o &lt;output working directory&gt; \
+    -dm &lt;DistanceMeasure&gt; \
+    -m &lt;fuzziness argument &gt;1&gt; \
+    -x &lt;maximum number of iterations&gt; \
+    -k &lt;optional number of initial clusters to sample from input vectors&gt; \
+    -cd &lt;optional convergence delta. Default is 0.5&gt; \
+    -ow &lt;overwrite output directory if present&gt;
+    -cl &lt;run input vector clustering after computing Clusters&gt;
+    -e &lt;emit vectors to most likely cluster during clustering&gt;
+    -t &lt;threshold to use for clustering if -e is false&gt;
+    -xm &lt;execution method: sequential or mapreduce&gt;
+</PRE>
+</DIV></DIV>
+
+<P><B>Note:</B> if the -k argument is supplied, any clusters in the -c directory will be overwritten and -k random points will be sampled from the input vectors to become the initial cluster centers.</P>
+
+<P>Invocation using Java involves supplying the following arguments:</P>
+
+<OL>
+	<LI>input: a file path string to a directory containing the input data set a SequenceFile(WritableComparable, VectorWritable). The sequence file <EM>key</EM> is not used.</LI>
+	<LI>clustersIn: a file path string to a directory containing the initial clusters, a SequenceFile(key, SoftCluster | Cluster | Canopy). Fuzzy k-Means SoftClusters, k-Means Clusters and Canopy Canopies may be used for the initial clusters.</LI>
+	<LI>output: a file path string to an empty directory which is used for all output from the algorithm.</LI>
+	<LI>measure: the fully-qualified class name of an instance of DistanceMeasure which will be used for the clustering.</LI>
+	<LI>convergence: a double value used to determine if the algorithm has converged (clusters have not moved more than the value in the last iteration)</LI>
+	<LI>max-iterations: the maximum number of iterations to run, independent of the convergence specified</LI>
+	<LI>m: the &quot;fuzzyness&quot; argument, a double &gt; 1. For m equal to 2, this is equivalent to normalising the coefficient linearly to make their sum 1. When m is close to 1, then the cluster center closest to the point is given much more weight than the others, and the algorithm is similar to k-means.</LI>
+	<LI>runClustering: a boolean indicating, if true, that the clustering step is to be executed after clusters have been determined.</LI>
+	<LI>emitMostLikely: a boolean indicating, if true, that the clustering step should only emit the most likely cluster for each clustered point.</LI>
+	<LI>threshold: a double indicating, if emitMostLikely is false, the cluster probability threshold used for emitting multiple clusters for each point. A value of 0 will emit all clusters with their associated probabilities for each vector.</LI>
+	<LI>runSequential: a boolean indicating, if true, that the algorithm is to use the sequential reference implementation running in memory.</LI>
+</OL>
+
+
+<P>After running the algorithm, the output directory will contain:</P>
+<OL>
+	<LI>clusters-N: directories containing SequenceFiles(Text, SoftCluster) produced by the algorithm for each iteration. The Text <EM>key</EM> is a cluster identifier string.</LI>
+	<LI>clusteredPoints: (if runClustering enabled) a directory containing SequenceFile(IntWritable, WeightedVectorWritable). The IntWritable <EM>key</EM> is the clusterId. The WeightedVectorWritable <EM>value</EM> is a bean containing a double <EM>weight</EM> and a VectorWritable <EM>vector</EM> where the weight indicates the probability that the vector is a member of the cluster.</LI>
+</OL>
+
+
+<H1><A name="FuzzyK-Means-Examples"></A>Examples</H1>
+
+<P>The following images illustrate Fuzzy k-Means clustering applied to a set of randomly-generated 2-d data points. The points are generated using a normal distribution centered at a mean location and with a constant standard deviation. See the README file in the <A href="http://svn.apache.org/repos/asf/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/README.txt" class="external-link" rel="nofollow">/examples/src/main/java/org/apache/mahout/clustering/display/README.txt</A> for details on running similar examples.</P>
+
+<P>The points are generated as follows:</P>
+
+<UL>
+	<LI>500 samples m=[1.0, 1.0] sd=3.0</LI>
+	<LI>300 samples m=[1.0, 0.0] sd=0.5</LI>
+	<LI>300 samples m=[0.0, 2.0] sd=0.1</LI>
+</UL>
+
+
+<P>In the first image, the points are plotted and the 3-sigma boundaries of their generator are superimposed. </P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="fuzzy-k-means.data/SampleData.png" style="border: 0px solid black"></SPAN></P>
+
+<P>In the second image, the resulting clusters (k=3) are shown superimposed upon the sample data. As Fuzzy k-Means is an iterative algorithm, the centers of the clusters in each recent iteration are shown using different colors. Bold red is the final clustering and previous iterations are shown in [orange, yellow, green, blue, violet and gray]. Although it misses a lot of the points and cannot capture the original, superimposed cluster centers, it does a decent job of clustering this data.</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="fuzzy-k-means.data/FuzzyKMeans.png" style="border: 0px solid black"></SPAN></P>
+
+<P>The third image shows the results of running Fuzzy k-Means on a different data set (see <A href="dirichlet-process-clustering.html" title="Dirichlet Process Clustering">Dirichlet Process Clustering</A> for details) which is generated using asymmetrical standard deviations. Fuzzy k-Means does a fair job handling this data set as well.</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="fuzzy-k-means.data/2dFuzzyKMeans.png" style="border: 0px solid black"></SPAN></P>
+
+<H4><A name="FuzzyK-Means-References%26nbsp%3B"></A>References&nbsp;</H4>
+
+<UL>
+	<LI><A href="http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering" class="external-link" rel="nofollow">http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering</A></LI>
+</UL>
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/gaussian-discriminative-analysis.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/gaussian-discriminative-analysis.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/gaussian-discriminative-analysis.html (added)
+++ mahout/site/new_website/MAHOUT/gaussian-discriminative-analysis.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,138 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Gaussian Discriminative Analysis</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Gaussian Discriminative Analysis">Gaussian Discriminative Analysis</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Gaussian Discriminative Analysis</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75690">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75690">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75690">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75690">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75690">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75690">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="GaussianDiscriminativeAnalysis-GaussianDiscriminativeAnalysis"></A>Gaussian Discriminative Analysis</H1>
+
+<P>Gaussian Discriminative Analysis is a tool for multigroup classification based on extending linear discriminant analysis. The paper on the approach is located at <A href="http://citeseer.ist.psu.edu/4617.html" class="external-link" rel="nofollow">http://citeseer.ist.psu.edu/4617.html</A> (note, for some reason the paper is backwards, in that page 1 is at the end)</P>
+
+<H2><A name="GaussianDiscriminativeAnalysis-Parallelizationstrategy"></A>Parallelization strategy</H2>
+
+<H2><A name="GaussianDiscriminativeAnalysis-Designofpackages"></A>Design of packages</H2>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/glossary.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/glossary.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/glossary.html (added)
+++ mahout/site/new_website/MAHOUT/glossary.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,180 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Glossary</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="" title="Glossary">Glossary</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Glossary</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=10846323">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=10846323">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=10846323">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=10846323">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=10846323">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=10846323">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>This is a list of common glossary terms used on both the mailing lists and around the site. Where possible I have tried to provide a link to more in-depth explanations from the web</P>
+
+<H4><A href="llr-log-likelihood-ratio.html" title="LLR - Log-likelihood Ratio">LLR - Log-likelihood Ratio</A></H4>
+<P>Likelihood ratio test is used to compare the fit of two models one of which is nested within the other.</P>
+<H4><A href="mr-map-reduce.html" title="MR - Map Reduce">MR - Map Reduce</A></H4>
+<P>MapReduce is a framework for processing huge datasets on certain kinds of distributable problems using a large number of computers (nodes), collectively referred to as a cluster.</P>
+<H4><A href="pearsoncorrelation.html" title="PearsonCorrelation">PearsonCorrelation</A></H4>
+<P>The Pearson correlation measures the degree to which two series of numbers tend to move together &ndash; values in corresponding positions tend to be high together, or low together. In particular it measures the strength of the linear relationship between the two series, the degree to which one can be estimated as a linear function of the other. It is often used in collaborative filtering as a similarity metric on users or items; users that tend to rate the same items high, or low, have a high Pearson correlation and therefore are &quot;similar&quot;.</P>
+<H4><A href="svd-singular-value-decomposition.html" title="SVD - Singular Value Decomposition">SVD - Singular Value Decomposition</A></H4>
+<P>Singular Value Decomposition is a form of product decomposition of a matrix in which a rectangular matrix A is decomposed into a product U s V' where U and V are orthonormal and s is a diagonal matrix.</P>
+<H4><A href="tf-idf-term-frequency-inverse-document-frequency.html" title="TF-IDF - Term Frequency-Inverse Document Frequency">TF-IDF - Term Frequency-Inverse Document Frequency</A></H4>
+<P>Is a weight measure often used in information retrieval and text mining. This weight is a statistical measure used to evaluate how important a word is to a document in a collection or corpus. The importance increases proportionally to the number of times a word appears in the document but is offset by the frequency of the word in the corpus.</P>
+
+        </DIV>
+
+                  <DIV class="tabletitle">
+            Children
+            <SPAN class="smalltext" id="show" style="display: inline;">
+              <A href="javascript:showChildren()">Show Children</A></SPAN>
+            <SPAN class="smalltext" id="hide" style="display: none;">
+              <A href="javascript:hideChildren()">Hide Children</A></SPAN>
+          </DIV>
+          <DIV class="greybox" id="children" style="display: none;">
+                                      <A href="mr-map-reduce.html" title="MR - Map Reduce">MR - Map Reduce</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="tf-idf-term-frequency-inverse-document-frequency.html" title="TF-IDF - Term Frequency-Inverse Document Frequency">TF-IDF - Term Frequency-Inverse Document Frequency</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="pearsoncorrelation.html" title="PearsonCorrelation">PearsonCorrelation</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="svd-singular-value-decomposition.html" title="SVD - Singular Value Decomposition">SVD - Singular Value Decomposition</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="llr-log-likelihood-ratio.html" title="LLR - Log-likelihood Ratio">LLR - Log-likelihood Ratio</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                      </DIV>
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/gsoc.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/gsoc.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/gsoc.html (added)
+++ mahout/site/new_website/MAHOUT/gsoc.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,165 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>GSOC</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="" title="GSOC">GSOC</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">GSOC</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=14814449">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=14814449">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=14814449">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=14814449">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=14814449">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=14814449">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="GSOC-Introduction"></A>Introduction</H1>
+
+<P>Mahout has been mentoring students in Google Summer of Code for as long as the project has existed.  To help students better understand what is expected of them, this page lays out common advice, links and other tips and tricks for successfully creating a GSOC proposal for Mahout.</P>
+
+<P>Be warned, however, that GSOC, particularly at the Apache Software Foundation (ASF), is fairly competitive.  Not only are you competing against others within Mahout, but Mahout is competing with other projects in the ASF.  Therefore, it is very important that proposals be well referenced and well thought out.  Even if you don't get selected, consider sticking around.  Open source is great career builder and can open up many opportunities for you.</P>
+
+<H1><A name="GSOC-TipsonGoodProposals"></A>Tips on Good Proposals</H1>
+
+<UL>
+	<LI>Interact with the community before proposal time.  This is actually part of how we rate proposals.  Having a good idea is just one part of the process.  You must show you can communicate and work within the community parameters.   You might even consider putting up a patch or two that shows you get how things work.  See <A href="how-to-contribute.html" title="How To Contribute">How To Contribute</A>.</LI>
+	<LI>Since Machine Learning is fairly academic, be sure to cite your sources in your proposal.</LI>
+	<LI>Provide a realistic timeline.  Be sure you indicate what other obligations you have during the summer.  It may seem worthwhile to lie here, but we have failed students mid-term in the past because they did not participate as they said they would.  Failing mid-term means not getting paid.</LI>
+	<LI>Do not mail mentors off list privately unless it is something truly personal (most things are not).  This will likely decrease your chances of being selected, not increase them.</LI>
+	<LI>DO NOT BITE OFF MORE THAN YOU CAN CHEW.  Every year, there are a few students who propose to implement 3-5 machine learning algorithms on Map/Reduce, all in a two month period.  They NEVER get selected.   Be realistic.  All successful projects to date follow, more or less, the following formula:  Implement algorithm on Map/Reduce.  Write Unit Tests.  Do some bigger scale tests.  Write 1 or 2 examples.  Write Wiki documentation.  That's it.  Trust us, it takes a summer to do these things.</LI>
+	<LI><A href="http://www.lucidimagination.com/search/document/2acd6fd380feec3/thoughts_on_gsoc" class="external-link" rel="nofollow">http://www.lucidimagination.com/search/document/2acd6fd380feec3/thoughts_on_gsoc</A></LI>
+</UL>
+
+
+
+<H1><A name="GSOC-Whattoexpectonceselected"></A>What to expect once selected</H1>
+
+<UL>
+	<LI>Just as in the proposals, almost all interaction should take place on the mailing lists.  Only personal matters related to your whereabouts or your evaluation will take place privately.</LI>
+	<LI>Show up.  Ask questions.  Be engaged.  We don't care if you know it all about what you are implementing.  We care about you contributing to open source.  You learn.  We learn.  Win-win.</LI>
+	<LI>Enjoy it!  Contributing to open source can open some amazing doors for your career.
+<H1><A name="GSOC-References"></A>References</H1></LI>
+</UL>
+
+
+<P><A href="http://www.lucidimagination.com/search/?%3DGSOC#/p:mahout" class="external-link" rel="nofollow">Mahout Mail Archives about GSOC</A></P>
+
+<P><A href="http://code.google.com/soc/" class="external-link" rel="nofollow">GSOC Home</A></P>
+
+<P><A href="http://socghop.appspot.com/document/show/gsoc_program/google/gsoc2010/faqs" class="external-link" rel="nofollow">GSOC FAQ</A></P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file



Mime
View raw message