mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1243022 [16/38] - in /mahout/site/new_website: ./ MAHOUT/ MAHOUT/2010/ MAHOUT/2010/09/ MAHOUT/2010/09/14/ MAHOUT/2011/ MAHOUT/2011/10/ MAHOUT/2011/10/21/ MAHOUT/books-tutorials-and-talks.data/ MAHOUT/books-tutorials-talks.data/ MAHOUT/book...
Date Sat, 11 Feb 2012 10:22:31 GMT
Added: mahout/site/new_website/MAHOUT/lda-commandline.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/lda-commandline.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/lda-commandline.html (added)
+++ mahout/site/new_website/MAHOUT/lda-commandline.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,197 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>lda-commandline</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="quickstart.html" title="Quickstart">Quickstart</A>&nbsp;&gt;&nbsp;<A href="clusteringyourdata.html" title="ClusteringYourData">ClusteringYourData</A>&nbsp;&gt;&nbsp;<A href="" title="lda-commandline">lda-commandline</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">lda-commandline</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=21792186">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=21792186">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=21792186">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=21792186">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=21792186">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=21792186">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="lda-commandline-RunningLatentDirichletAllocationfromtheCommandLine"></A>Running Latent Dirichlet Allocation from the Command Line</H1>
+<P>Mahout's LDA can be launched from the same command line invocation whether you are running on a single machine in stand-alone mode or on a larger Hadoop cluster. The difference is determined by the $HADOOP_HOME and $HADOOP_CONF_DIR environment variables. If both are set to an operating Hadoop cluster on the target machine then the invocation will run LDA on that cluster. If either of the environment variables are missing then the stand-alone Hadoop configuration will be invoked instead.</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+./bin/mahout lda &lt;OPTIONS&gt;
+</PRE>
+</DIV></DIV>
+
+<UL>
+	<LI>In $MAHOUT_HOME/, build the jar containing the job (mvn install) The job will be generated in $MAHOUT_HOME/core/target/ and it's name will contain the Mahout version number. For example, when using Mahout 0.3 release, the job will be mahout-core-0.3.job</LI>
+</UL>
+
+
+
+<H2><A name="lda-commandline-Testingitononesinglemachinew%2Focluster"></A>Testing it on one single machine w/o cluster</H2>
+
+<UL>
+	<LI>Put the data: cp &lt;PATH TO DATA&gt; testdata</LI>
+	<LI>Run the Job:
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+./bin/mahout lda -i testdata &lt;OTHER OPTIONS&gt;
+</PRE>
+</DIV></DIV></LI>
+</UL>
+
+
+<H2><A name="lda-commandline-Runningitonthecluster"></A>Running it on the cluster</H2>
+
+<UL>
+	<LI>(As needed) Start up Hadoop: $HADOOP_HOME/bin/start-all.sh</LI>
+	<LI>Put the data: $HADOOP_HOME/bin/hadoop fs -put &lt;PATH TO DATA&gt; testdata</LI>
+	<LI>Run the Job:
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+export HADOOP_HOME=&lt;Hadoop Home Directory&gt;
+export HADOOP_CONF_DIR=$HADOOP_HOME/conf
+./bin/mahout lda -i testdata &lt;OTHER OPTIONS&gt;
+</PRE>
+</DIV></DIV></LI>
+	<LI>Get the data out of HDFS and have a look. Use bin/hadoop fs -lsr output to view all outputs.</LI>
+</UL>
+
+
+<H1><A name="lda-commandline-Commandlineoptions"></A>Command line options</H1>
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+  --input (-i) input                      Path to job input directory. Must be  
+                                          a SequenceFile of VectorWritable      
+  --output (-o) output                    The directory pathname <SPAN class="code-keyword">for</SPAN> output.    
+  --numTopics (-k) numTopics              The total number of topics in the     
+                                          corpus                                
+  --numWords (-v) numWords                The total number of words in the      
+                                          corpus (can be approximate, needs to  
+                                          exceed the actual value)              
+  --topicSmoothing (-a) topicSmoothing    Topic smoothing parameter. Default is 
+                                          50/numTopics.                         
+  --maxIter (-x) maxIter                  The maximum number of iterations.     
+  --maxRed (-r) maxRed                    The number of reduce tasks. Defaults  
+                                          to 2                                  
+  --overwrite (-ow)                       If present, overwrite the output      
+                                          directory before running job          
+  --help (-h)                             Print out help                        
+</PRE>
+</DIV></DIV>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/llr-log-likelihood-ratio.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/llr-log-likelihood-ratio.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/llr-log-likelihood-ratio.html (added)
+++ mahout/site/new_website/MAHOUT/llr-log-likelihood-ratio.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,146 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>LLR - Log-likelihood Ratio</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="glossary.html" title="Glossary">Glossary</A>&nbsp;&gt;&nbsp;<A href="" title="LLR - Log-likelihood Ratio">LLR - Log-likelihood Ratio</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">LLR - Log-likelihood Ratio</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=10846339">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=10846339">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=10846339">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=10846339">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=10846339">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=10846339">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Likelihood ratio test is used to compare the fit of two models one of which is nested within the other.</P>
+
+<P>In the context of machine learning and the Mahout project in particular, the term LLR is usually meant to refer to a test of significance for two binomial distributions, also known as the G squared statistic.  This is a special case of the multinomial test and is closely related to mutual information.  The value of this statistic is not normally used in this context as a true frequentist test of significance since there would be obvious and dreadful problems to do with multiple comparisons, but rather as a heuristic score to order pairs of items with the most interestingly connected items having higher scores.  In this usage, the LLR has proven very useful for discriminating pairs of features that have interesting degrees of cooccurrence and those that do not with usefully small false positive and false negative rates.  The LLR is typically far more suitable in the case of small than many other measures such as Pearson's correlation, Pearson's chi squared statistic or z s
 tatistics.  The LLR as stated does not, however, make any use of rating data which can limit its applicability in problems such as the Netflix competition. </P>
+
+<P>The actual value of the LLR is not usually very helpful other than as a way of ordering pairs of items.  As such, it is often used to determine a sparse set of coefficients to be estimated by other means such as TF-IDF.  Since the actual estimation of these coefficients can be done in a way that is independent of the training data such as by general corpus statistics, and since the ordering imposed by the LLR is relatively robust to counting fluctuation, this technique can provide very strong results in very sparse problems where the potential number of features vastly out-numbers the number of training examples and where features are highly interdependent.</P>
+
+<P> See Also: </P>
+<UL>
+	<LI><A href="http://tdunning.blogspot.com/2008/03/surprise-and-coincidence.html" class="external-link" rel="nofollow">http://tdunning.blogspot.com/2008/03/surprise-and-coincidence.html</A></LI>
+	<LI><A href="http://en.wikipedia.org/wiki/G-test" class="external-link" rel="nofollow">http://en.wikipedia.org/wiki/G-test</A></LI>
+	<LI><A href="http://en.wikipedia.org/wiki/Likelihood-ratio_test" class="external-link" rel="nofollow">http://en.wikipedia.org/wiki/Likelihood-ratio_test</A></LI>
+</UL>
+
+
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/locally-weighted-linear-regression.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/locally-weighted-linear-regression.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/locally-weighted-linear-regression.html (added)
+++ mahout/site/new_website/MAHOUT/locally-weighted-linear-regression.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,139 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Locally Weighted Linear Regression</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Locally Weighted Linear Regression">Locally Weighted Linear Regression</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Locally Weighted Linear Regression</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75680">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75680">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75680">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75680">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75680">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75680">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          
+<H1><A name="LocallyWeightedLinearRegression-LocallyWeightedLinearRegression"></A>Locally Weighted Linear Regression</H1>
+
+<P>Model-based methods, such as SVM, Naive Bayes and the mixture of Gaussians, use the data to build a parameterized model. After training, the model is used for predictions and the data are generally discarded. In contrast, &quot;memory-based&quot; methods are non-parametric approaches that explicitly retain the training data, and use it each time a prediction needs to be made. Locally weighted regression (LWR) is a memory-based method that performs a regression around a point of interest using only training data that are &quot;local&quot; to that point. Source: <A href="http://www.cs.cmu.edu/afs/cs/project/jair/pub/volume4/cohn96a-html/node7.html" class="external-link" rel="nofollow">http://www.cs.cmu.edu/afs/cs/project/jair/pub/volume4/cohn96a-html/node7.html</A></P>
+
+<H2><A name="LocallyWeightedLinearRegression-Strategyforparallelregression"></A>Strategy for parallel regression</H2>
+
+<H2><A name="LocallyWeightedLinearRegression-Designofpackages"></A>Design of packages</H2>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/logistic-regression.data/sgd-class-hierarchy.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/logistic-regression.data/sgd-class-hierarchy.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/logistic-regression.data/sgd-class-hierarchy.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/logistic-regression.data/sgd-class-hierarchy.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/logistic-regression.data/sgd-class-hierarchy.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/logistic-regression.data/sgd-class-hierarchy.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/logistic-regression.data/vector-class-hierarchy.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/logistic-regression.data/vector-class-hierarchy.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/logistic-regression.data/vector-class-hierarchy.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/logistic-regression.data/vector-class-hierarchy.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/logistic-regression.data/vector-class-hierarchy.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/logistic-regression.data/vector-class-hierarchy.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/logistic-regression.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/logistic-regression.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/logistic-regression.html (added)
+++ mahout/site/new_website/MAHOUT/logistic-regression.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,203 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Logistic Regression</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Logistic Regression">Logistic Regression</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Logistic Regression</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75687">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75687">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75687">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75687">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75687">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75687">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="LogisticRegression-LogisticRegression%28SGD%29"></A>Logistic Regression (SGD)</H1>
+
+<P>Logistic regression is a model used for prediction of the probability of occurrence of an event. It makes use of several predictor variables that may be either numerical or categories.</P>
+
+<P>Logistic regression is the standard industry workhorse that underlies many production fraud detection and advertising quality and targeting products.  The Mahout implementation uses Stochastic Gradient Descent (SGD) to all large training sets to be used.</P>
+
+<P>For a more detailed analysis of the approach, have a look at the thesis of Paul Komarek:</P>
+
+<P><A href="http://www.autonlab.org/autonweb/14709/version/4/part/5/data/komarek:lr_thesis.pdf?branch=main&language=en" class="external-link" rel="nofollow">http://www.autonlab.org/autonweb/14709/version/4/part/5/data/komarek:lr_thesis.pdf?branch=main&amp;language=en</A></P>
+
+<P>See MAHOUT-228 for the main JIRA issue for SGD.</P>
+
+
+<H2><A name="LogisticRegression-Parallelizationstrategy"></A>Parallelization strategy</H2>
+
+<P>The bad news is that SGD is an inherently sequential algorithm.  The good news is that it is blazingly fast and thus it is not a problem for Mahout's implementation to handle training sets of tens of millions of examples.  With the down-sampling typical in many data-sets, this is equivalent to a dataset with billions of raw training examples.</P>
+
+<P>The SGD system in Mahout is an online learning algorithm which means that you can learn models in an incremental fashion and that you can do performance testing as your system runs.  Often this means that you can stop training when a model reaches a target level of performance.  The SGD framework includes classes to do on-line evaluation using cross validation (the CrossFoldLearner) and an evolutionary system to do learning hyper-parameter optimization on the fly (the AdaptiveLogisticRegression).  The AdaptiveLogisticRegression system makes heavy use of threads to increase machine utilization.  The way it works is that it runs 20 CrossFoldLearners in separate threads, each with slightly different learning parameters.  As better settings are found, these new settings are propagating to the other learners.</P>
+
+<H2><A name="LogisticRegression-Designofpackages"></A>Design of packages</H2>
+
+<P>There are three packages that are used in Mahout's SGD system.  These include</P>
+
+<UL>
+	<LI>The vector encoding package (found in org.apache.mahout.vectorizer.encoders)</LI>
+</UL>
+
+
+<UL>
+	<LI>The SGD learning package (found in org.apache.mahout.classifier.sgd)</LI>
+</UL>
+
+
+<UL>
+	<LI>The evolutionary optimization system (found in org.apache.mahout.ep)</LI>
+</UL>
+
+
+<H3><A name="LogisticRegression-Featurevectorencoding"></A>Feature vector encoding</H3>
+
+<P>Because the SGD algorithms need to have fixed length feature vectors and because it is a pain to build a dictionary ahead of time, most SGD applications use the hashed feature vector encoding system that is rooted at FeatureVectorEncoder.</P>
+
+<P>The basic idea is that you create a vector, typically a RandomAccessSparseVector, and then you use various feature encoders to progressively add features to that vector.  The size of the vector should be large enough to avoid feature collisions as features are hashed.</P>
+
+<P>There are specialized encoders for a variety of data types.  You can normally encode either a string representation of the value you want to encode or you can encode a byte level representation to avoid string conversion.  In the case of ContinuousValueEncoder and ConstantValueEncoder, it is also possible to encode a null value and pass the real value in as a weight.  This avoids numerical parsing entirely in case you are getting your training data from a system like Avro.</P>
+
+<P>Here is a class diagram for the encoders package:</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="logistic-regression.data/vector-class-hierarchy.png" style="border: 1px solid black"></SPAN></P>
+
+<H3><A name="LogisticRegression-SGDLearning"></A>SGD Learning</H3>
+
+<P>For the simplest applications, you can construct an OnlineLogisticRegression and be off and running.  Typically, though, it is nice to have running estimates of performance on held out data.  To do that, you should use a CrossFoldLearner which keeps a stable of five (by default) OnlineLogisticRegression objects.  Each time you pass a training example to a CrossFoldLearner, it passes this example to all but one of its children as training and passes the example to the last child to evaluate current performance.  The children are used for evaluation in a round-robin fashion so, if you are using the default 5 way split, all of the children get 80% of the training data for training and get 20% of the data for evaluation.</P>
+
+<P>To avoid the pesky need to configure learning rates, regularization parameters and annealing schedules, you can use the AdaptiveLogisticRegression.  This class maintains a pool of CrossFoldLearners and adapts learning rates and regularization on the fly so that you don't have to.</P>
+
+<P>Here is a class diagram for the classifiers.sgd package.  As you can see, the number of twiddlable knobs is pretty large.  For some examples, see the TrainNewsGroups example code.</P>
+
+<P><SPAN class="image-wrap" style=""><IMG src="logistic-regression.data/sgd-class-hierarchy.png" style="border: 1px solid black"></SPAN></P>
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/machine-learning-resources.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/machine-learning-resources.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/machine-learning-resources.html (added)
+++ mahout/site/new_website/MAHOUT/machine-learning-resources.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,154 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Machine Learning Resources</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Machine Learning Resources">Machine Learning Resources</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Machine Learning Resources</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=27830446">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=27830446">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=27830446">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=27830446">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=27830446">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=27830446">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H2><A name="MachineLearningResources-MachineLearningingeneral"></A>Machine Learning in general</H2>
+<UL>
+	<LI><A href="http://www.ml-class.org/" class="external-link" rel="nofollow">Machine Learning Videos</A> by Andrew Ng</LI>
+</UL>
+
+
+<H2><A name="MachineLearningResources-AboutMahout"></A>About Mahout</H2>
+<UL>
+	<LI><A href="http://www.manning.com/owen" class="external-link" rel="nofollow">Mahout in Action</A> by Sean Owen, et. al.</LI>
+</UL>
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/mahout-benchmarks.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/mahout-benchmarks.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/mahout-benchmarks.html (added)
+++ mahout/site/new_website/MAHOUT/mahout-benchmarks.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,247 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Mahout Benchmarks</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="" title="Mahout Benchmarks">Mahout Benchmarks</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Mahout Benchmarks</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=21791409">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=21791409">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=21791409">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=21791409">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=21791409">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=21791409">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="MahoutBenchmarks-Introduction"></A>Introduction</H1>
+
+<P>TODO:  YMMV</P>
+
+<H1><A name="MahoutBenchmarks-Recommenders"></A>Recommenders</H1>
+
+<H2><A name="MahoutBenchmarks-ARuleofThumb"></A>A Rule of Thumb</H2>
+
+<P>100M preferences are about the data set size where non-distributed recommenders will outgrow a normal-sized machine (32-bit, &lt;= 4GB RAM). Your mileage will vary significantly with the nature of the data.</P>
+
+<H2><A name="MahoutBenchmarks-Distributedrecommendervs.Wikipedialinks%28May272010%29"></A>Distributed recommender vs. Wikipedia links (May 27 2010)</H2>
+
+<P>From the mailing list:</P>
+
+<P>I just finished running a set of recommendations based on the Wikipedia link graph, for book purposes (yeah, it's unconventional). I ran on my laptop, but it ought to be crudely representative of how it runs in a real cluster.</P>
+
+<P>The input is 1058MB as a text file, and contains, 130M article-article associations, from 5.7M articles to 3.8M distinct articles (&quot;users&quot; and &quot;items&quot;, respectively). I estimate cost based on Amazon's North<BR>
+American small Linux-based instance pricing of $0.085/hour. I ran on a dual-core laptop with plenty of RAM, allowing 1GB per worker, so this is valid.</P>
+
+<P>In this run, I run recommendations for all 5.7M &quot;users&quot;. You can certainly run for any subset of all users of course.</P>
+
+<P>Phase 1 (Item ID to item index mapping)<BR>
+29 minutes CPU time<BR>
+$0.05<BR>
+60MB output</P>
+
+<P>Phase 2 (Create user vectors)<BR>
+88 minutes CPU time<BR>
+$0.13<BR>
+Output: 1159MB</P>
+
+<P>Phase 3 (Count co-occurrence)<BR>
+77 hours CPU time<BR>
+$6.54<BR>
+Output: 23.6GB</P>
+
+<P>Phase 4 (Partial multiply prep)<BR>
+10.5 hours CPU time<BR>
+$0.90<BR>
+Output: 24.6GB</P>
+
+<P>Phase 5 (Aggregate and recommend)<BR>
+about 600 hours<BR>
+about $51.00<BR>
+about 10GB<BR>
+(I estimated these rather than let it run at home for days!)</P>
+
+
+<P>Note that phases 1 and 3 may be run less frequently, and need not be run every time. But the cost is dominated by the last step, which is most of the work. I've ignored storage costs.</P>
+
+<P>This implies a cost of $0.01 (or about 8 instance-minutes) per 1,000 user recommendations. That's not bad if, say, you want to update recs for you site's 100,000 daily active users for a dollar.</P>
+
+<P>There are several levers one could pull internally to sacrifice accuracy for speed, but it's currently set to pretty normal values. So this is just one possibility.</P>
+
+<P>Now that's not terrible, but it is about 8x more computing than would be needed by a non-distributed implementation <B>if</B> you could fit the whole data set into a very large instance's memory, which is still possible at this scale but needs a pretty big instance. That's a very apples-to-oranges comparison of course; different algorithms, entirely different environments. This is about the amount of overhead I'd expect from distributing &ndash; interesting to note how non-trivial it is.</P>
+
+<H2><A name="MahoutBenchmarks-Nondistributedrecommendervs.KDDCupdataset%28March2011%29"></A>Non-distributed recommender vs. KDD Cup data set (March 2011)</H2>
+
+<P>(From the user@mahout.apache.org mailing list)</P>
+
+<P>I've been test-driving a simple application of Mahout recommenders (the non-distributed kind) on Amazon EC2 on the new Yahoo KDD Cup data set (kddcup.yahoo.com).</P>
+
+<P>In the spirit of open-source, like I mentioned, I'm committing the extra code to mahout-examples that can be used to run a Recommender on the input and output the right format. And, I'd like to publish the rough timings too. Find all the source in org.apache.mahout.cf.taste.example.kddcup</P>
+
+<H3><A name="MahoutBenchmarks-Track1"></A>Track 1</H3>
+
+<UL>
+	<LI>m2.2xlarge instance, 34.2GB RAM / 4 cores</LI>
+	<LI>Steady state memory consumption: ~19GB</LI>
+	<LI>Computation time: 30 hours (wall clock-time)</LI>
+	<LI>CPU time per user: ~0.43 sec</LI>
+	<LI>Cost on EC2: $34.20 <IMG class="emoticon" src="https://cwiki.apache.org/confluence/images/icons/emoticons/warning.gif" height="16" width="16" align="absmiddle" alt="" border="0"></LI>
+</UL>
+
+
+<P>(Helpful hint on cost I realized after the fact: you can almost surely get spot instances for cheaper. The maximum price this sort of instance has gone for as a spot instance is about $0.60/hour, vs &quot;retail price&quot; of $1.14/hour.)</P>
+
+<P>Resulted in an RMSE of 29.5618 (the rating scale is 0-100), which is only good enough for 29th place at the moment. Not terrible for &quot;out of the box&quot; performance &ndash; it's just using an item-based recommender with uncentered cosine similarity. But not really good in absolute terms. A winning solution is going to try to factor in time, and apply more sophisticated techniques. The best RMSE so far is about 23.</P>
+
+<H3><A name="MahoutBenchmarks-Track2"></A>Track 2</H3>
+
+<UL>
+	<LI>c1.xlarge instance: 7GB RAM / 8 cores</LI>
+	<LI>Steady state memory consumption: ~3.8GB</LI>
+	<LI>Computation time: 4.1 hours (wall clock-time)</LI>
+	<LI>CPU time per user: ~1.1 sec</LI>
+	<LI>Cost on EC2: $3.20</LI>
+</UL>
+
+
+<P>For this I bothered to write a simplistic item-item similarity metric to take into account the additional info that is available: track, artist, album, genre. The result was comparatively better: 17.92% error rate, good enough for 4th place at the moment.</P>
+
+<P>Of course, the next task is to put this through the actual distributed processing &ndash; that's really the appropriate solution.</P>
+
+<P>This shows you can still tackle fairly impressive scale with a non-distributed solution. These results suggest that the largest instances available from EC2 would accomodate almost 1 billion ratings in memory. However at that scale running a user's full recommendations would easily be measured in seconds, not milliseconds.</P>
+
+<H1><A name="MahoutBenchmarks-Clustering"></A>Clustering</H1>
+
+<P>See <A href="https://issues.apache.org/jira/browse/MAHOUT-588" class="external-link" rel="nofollow">MAHOUT-588</A></P>
+
+<H1><A name="MahoutBenchmarks-Classification"></A>Classification</H1>
+
+<H1><A name="MahoutBenchmarks-FrequentPatternsetMining"></A>Frequent Patternset Mining</H1>
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/mahout-collections.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/mahout-collections.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/mahout-collections.html (added)
+++ mahout/site/new_website/MAHOUT/mahout-collections.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,160 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>mahout-collections</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="collections.html" title="Collections">Collections</A>&nbsp;&gt;&nbsp;<A href="" title="mahout-collections">mahout-collections</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">mahout-collections</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=14814825">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=14814825">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=14814825">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=14814825">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=14814825">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=14814825">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="mahout-collections-Introduction"></A>Introduction</H1>
+
+<P>The Mahout Collections library is a set of container classes that address some limitations of the standard collections in Java. <A href="http://domino.research.ibm.com/comm/research_people.nsf/pages/sevitsky.pubs.html/$FILE/oopsla08%20memory-efficient%20java%20slides.pdf" class="external-link" rel="nofollow">This presentation</A> describes a number of performance problems with the standard collections. </P>
+
+<P>Mahout collections addresses two of the more glaring: the lack of support for primitive types and the lack of open hashing.</P>
+
+<H1><A name="mahout-collections-PrimitiveTypes"></A>Primitive Types</H1>
+
+<P>The most visible feature of Mahout Collections is the large collection of primitive type collections. Given Java's asymmetrical support for the primitive types, the only efficient way to handle them is with many classes. So, there are ArrayList-like containers for all of the primitive types, and hash maps for all the useful combinations of primitive type and object keys and values.</P>
+
+<P>These classes do not, in general, implement interfaces from <TT>java.util</TT>. Even when the <TT>java.util</TT> interfaces could be type-compatible, they tend to include requirements that are not consistent with efficient use of primitive types.</P>
+
+<H1><A name="mahout-collections-OpenAddressing"></A>Open Addressing</H1>
+
+<P>All of the sets and maps in Mahout Collections are open-addressed hash tables. Open addressing has a much smaller memory footprint than chaining. Since the purpose of these collections is to avoid the memory cost of autoboxing, open addressing is a consistent design choice.</P>
+
+<H1><A name="mahout-collections-Sets"></A>Sets</H1>
+
+<P>Mahout Collections includes open hash sets. Unlike <TT>java.util</TT>, a set is not a recycled hash table; the sets are separately implemented and do not have any additional storage usage for unused keys.</P>
+
+<H1><A name="mahout-collections-CreditwhereCreditisdue"></A>Credit where Credit is due</H1>
+
+<P>The implementation of Mahout Collections is derived from <A href="http://acs.lbl.gov/~hoschek/colt/" class="external-link" rel="nofollow">Cern Colt</A>.</P>
+
+
+
+
+
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/mahout-mailing-lists.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/mahout-mailing-lists.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/mahout-mailing-lists.html (added)
+++ mahout/site/new_website/MAHOUT/mahout-mailing-lists.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,189 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Mahout Mailing Lists</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="" title="Mahout Mailing Lists">Mahout Mailing Lists</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Mahout Mailing Lists</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=74919">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=74919">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=74919">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=74919">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=74919">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=74919">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Communication at Mahout happens online via mailing lists. We have a user as well as a dev list for discussion. In addition there is a commit list so we are able to monitor what happens on the wiki and in svn.</P>
+
+<H2><A name="MahoutMailingLists-Mailinglists"></A>Mailing lists</H2>
+
+<H3><A name="MahoutMailingLists-MahoutUserList"></A>Mahout User List</H3>
+
+<P>This list is for users of Mahout to ask questions, share knowledge, and discuss issues. Do send mail to this list with usage and configuration questions and problems. Also, please send questions to this list to verify your problem before filing issues in JIRA. </P>
+
+<UL>
+	<LI><A href="mailto:mahout-user-subscribe@apache.org" class="external-link" rel="nofollow">Subscribe</A></LI>
+	<LI><A href="mailto:mahout-user-unsubscribe@apache.org" class="external-link" rel="nofollow">Unsubscribe</A></LI>
+</UL>
+
+
+<H3><A name="MahoutMailingLists-MahoutDeveloperList"></A>Mahout Developer List</H3>
+
+<P>This is the list where participating developers of the Mahout project meet and discuss issues concerning Mahout internals, code changes/additions, etc. Do not send mail to this list with usage questions or configuration questions and problems. </P>
+
+<P>Discussion list: </P>
+
+<UL>
+	<LI><A href="mailto:mahout-dev-subscribe@apache.org" class="external-link" rel="nofollow">Subscribe</A> &ndash; Do not send mail to this list with usage questions or configuration questions and problems.</LI>
+	<LI><A href="mailto:mahout-dev-unsubscribe@apache.org" class="external-link" rel="nofollow">Unsubscribe</A></LI>
+</UL>
+
+
+<P>Commit notifications: </P>
+
+<UL>
+	<LI><A href="mailto:mahout-commits-subscribe@apache.org" class="external-link" rel="nofollow">Subscribe</A></LI>
+	<LI><A href="mailto:mahout-commits-unsubscribe@apache.org" class="external-link" rel="nofollow">Unsubscribe</A></LI>
+</UL>
+
+
+<H2><A name="MahoutMailingLists-Archives"></A>Archives</H2>
+
+<H3><A name="MahoutMailingLists-OfficialApacheArchive"></A>Official Apache Archive</H3>
+
+<UL>
+	<LI><A href="http://mail-archives.apache.org/mod_mbox/mahout-dev/" class="external-link" rel="nofollow">http://mail-archives.apache.org/mod_mbox/mahout-dev/</A></LI>
+	<LI><A href="http://mail-archives.apache.org/mod_mbox/mahout-user/" class="external-link" rel="nofollow">http://mail-archives.apache.org/mod_mbox/mahout-user/</A></LI>
+</UL>
+
+
+<UL>
+	<LI><A href="http://mahout.apache.org/mail/" class="external-link" rel="nofollow">Mbox Archive</A></LI>
+</UL>
+
+
+<P>Archives previous to becoming Apache top level project:</P>
+
+<UL>
+	<LI><A href="http://mail-archives.apache.org/mod_mbox/lucene-mahout-dev/" class="external-link" rel="nofollow">http://mail-archives.apache.org/mod_mbox/lucene-mahout-dev/</A></LI>
+	<LI><A href="http://mail-archives.apache.org/mod_mbox/lucene-mahout-user/" class="external-link" rel="nofollow">http://mail-archives.apache.org/mod_mbox/lucene-mahout-user/</A></LI>
+</UL>
+
+
+<UL>
+	<LI><A href="http://lucene.apache.org/mail/" class="external-link" rel="nofollow">Mbox Archive</A></LI>
+</UL>
+
+
+<H3><A name="MahoutMailingLists-ExternalArchives"></A>External Archives</H3>
+
+<UL>
+	<LI><A href="http://www.lucidimagination.com/search" class="external-link" rel="nofollow">http://www.lucidimagination.com/search</A> - Search the entire Lucene ecosystem, including Mahout (archives, JIRA, etc.)  Powered by Solr/Lucene.</LI>
+	<LI><A href="http://mahout.markmail.org/" class="external-link" rel="nofollow">MarkMail</A></LI>
+	<LI><A href="http://www.nabble.com/Apache-Mahout-f32040.html" class="external-link" rel="nofollow">Nabble</A></LI>
+	<LI><A href="http://dir.gmane.org/gmane.comp.apache.mahout.user" class="external-link" rel="nofollow">Gmane</A></LI>
+</UL>
+
+
+<P>Please note the inclusion of a link to an archive does not imply an endorsement of that company by any of the committers of Mahout the Lucene PMC or the Apache Software Foundation. Each archive owner is solely responsible for the contents and availability of their archive.</P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+  </BODY>
+</HTML>
\ No newline at end of file



Mime
View raw message