mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1243022 [20/38] - in /mahout/site/new_website: ./ MAHOUT/ MAHOUT/2010/ MAHOUT/2010/09/ MAHOUT/2010/09/14/ MAHOUT/2011/ MAHOUT/2011/10/ MAHOUT/2011/10/21/ MAHOUT/books-tutorials-and-talks.data/ MAHOUT/books-tutorials-talks.data/ MAHOUT/book...
Date Sat, 11 Feb 2012 10:22:31 GMT
Added: mahout/site/new_website/MAHOUT/mean-shift.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/mean-shift.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/mean-shift.html (added)
+++ mahout/site/new_website/MAHOUT/mean-shift.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,173 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('http://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Mean Shift</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="index.html" title="Apache Lucene Mahout">Apache Lucene Mahout</A>&nbsp;&gt;&nbsp;<A href="index.html" title="index">index</A>&nbsp;&gt;&nbsp;<A href="" title="Mean Shift">Mean Shift</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Lucene Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Mean Shift</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="http://cwiki.apache.org/confluence/pages/editpage.action?pageId=81503">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="http://cwiki.apache.org/confluence/pages/editpage.action?pageId=81503">Edit Page</A>
+          &nbsp;
+          <A href="http://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="http://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="http://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=81503">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="http://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=81503">Add Page</A>
+          &nbsp;
+          <A href="http://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=81503">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="http://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=81503">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="MeanShift-MeanShiftClustering"></A>Mean Shift Clustering</H1>
+
+<P>&quot;Mean Shift: A Robust Approach to Feature Space Analysis&quot; (<A href="http://www.caip.rutgers.edu/riul/research/papers/pdf/mnshft.pdf" class="external-link" rel="nofollow">http://www.caip.rutgers.edu/riul/research/papers/pdf/mnshft.pdf</A>) introduces the geneology of the mean shift custering procedure which dates back to work in pattern recognition in 1975. The paper contains a detailed derivation and several examples of the use of mean shift for image smooting and segmentation. &quot;Mean Shift Clustering&quot; (<A href="http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/TUZEL1/MeanShift.pdf" class="external-link" rel="nofollow">http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/TUZEL1/MeanShift.pdf</A>) presents an overview of the algorithm with a summary of the derivation. An attractive feature of mean shift clustering is that it does not require a-priori knowledge of the number of clusters (as required in k-means) and it will produce arbitrarily-sha
 ped clusters that depend upon the topology of the data (unlike canopy).</P>
+
+<P>The algorithm begins with a set of datapoints, and creates a fixed-radius window for each. It then iterates over each window, calculating a mean shift vector which points in the direction of the maximum increase in the local density function. Then each window is migrated to a new position via the vector and the iteration resumes. Iterations complete when each window has reached a local maximum in the density function and the vector becomes negligable.</P>
+
+<H2><A name="MeanShift-ReferenceImplementation"></A>Reference Implementation</H2>
+
+<P>The implementation introduced by MAHOUT-15 uses modified Canopy Clustering canopies to represent the mean shift windows. </P>
+<UL>
+	<LI>It uses the canopy's T1 distance threshold as the radius of the window, and the canopy's T2 threshold to decide when two canopies have converged and will thereby follow the same path.</LI>
+	<LI>Each canopy contains one or more bound points which are represented using the (internally specified) integer ids of the bound points.</LI>
+	<LI>The algorithm is initialized with a canopy containing each input point.</LI>
+	<LI>During each iteration, every canopy calculates its mean shift vector by summing the canopy centers within its T1 threshold. This value is normalized and the resulting centroid becomes the new canopy center.
+	<UL>
+		<LI>The centers are weighted in proportion to their numbers of bound points (weighted pair-group centroid)</LI>
+	</UL>
+	</LI>
+	<LI>If any canopies are within their T2 thresholds they are merged and their respective bound points are accumulated.</LI>
+	<LI>The iterations complete when each canopy's mean shift vector has a magnitude less than a given termination delta.</LI>
+	<LI>Upon termination, the remaining canopies contain sets of points which are the members of their cluster.</LI>
+</UL>
+
+
+<H2><A name="MeanShift-Map%2FReduceImplementation"></A>Map/Reduce Implementation</H2>
+
+<UL>
+	<LI>Each mapper receives a subset of the canopies for each iteration. It compares each canopy with each one it has already seen and performs the T1 and T2 distance tests using an arbitrary user-supplied DistanceMeasure. The mapper merges canopies within T2 distance, moves each canopy to its new centroid position and outputs the canopy to the reducer with a constant key</LI>
+	<LI>A single reducer coalesces all the canopies from the combiners by performing another clustering iteration on them.</LI>
+	<LI>A driver class manages the iteration and determines when either a maximum number of iterations occur or the termination criteria is reached.</LI>
+</UL>
+
+
+
+<H2><A name="MeanShift-RunningMeanShiftClustering"></A>Running Mean Shift Clustering</H2>
+
+<P>The Mean Shift clustering algorithm may be run using a command-line invocation on MeanShiftCanopyDriver.main or by making a Java call to MeanShiftCanopyDriver.runJob(). Both require several arguments:</P>
+
+<OL>
+	<LI>input: a file path string to a directory containing the input data set a SequenceFile(WritableComparable, VectorWritable). The sequence file <EM>key</EM> is not used.</LI>
+	<LI>output: a file path string to an empty directory which is used for all output from the algorithm.</LI>
+	<LI>measure: the fully-qualified class name of an instance of DistanceMeasure which will be used for the clustering.</LI>
+	<LI>t1: the T1 threshold is used to determine if clusters are close enough to influence each other's next mean calculation.</LI>
+	<LI>t2: the T2 threshold is used to determine when two clusters are close enough to merge.</LI>
+	<LI>convergence: a double value used to determine if the algorithm has converged (clusters have not moved more than the value in the last iteration)</LI>
+	<LI>max-iterations: the maximum number of iterations to run, independent of the convergence specified</LI>
+	<LI>inputIsClusters: a boolean indicating, if true, that the input directory already contains MeanShiftCanopies and no further initialization is needed. If false (the default) input VectorWritables are used to form the initial canopies and these will be written to the clusters-0 directory.</LI>
+	<LI>runClustering: a boolean indicating, if true, that the clustering step is to be executed after clusters have been determined.</LI>
+</OL>
+
+
+<P>After running the algorithm, the output directory will contain:</P>
+<OL>
+	<LI>clusters-N: directories containing SequenceFiles(Text, MeanShiftCanopy) produced by the algorithm for each iteration. The Text <EM>key</EM> is a cluster identifier string.</LI>
+	<LI>clusteredPoints: (if runClustering enabled) a directory containing SequenceFile(IntWritable, WeightedVectorWritable). The IntWritable <EM>key</EM> is the clusterId. The WeightedVectorWritable <EM>value</EM> is a bean containing a double <EM>weight</EM> and a VectorWritable <EM>vector</EM> where the weight indicates the probability that the vector is a member of the cluster. As Mean Shift only produces a single clustering for each point, the weights are all == 1.</LI>
+</OL>
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/minhash-clustering.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/minhash-clustering.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/minhash-clustering.html (added)
+++ mahout/site/new_website/MAHOUT/minhash-clustering.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,228 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Minhash Clustering</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Minhash Clustering">Minhash Clustering</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Minhash Clustering</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=27826407">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=27826407">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=27826407">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=27826407">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=27826407">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=27826407">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Minhash clustering performs probabilistic dimension reduction of high dimensional data. The essence of the technique is to hash each item using multiple independent hash functions such that the probability of collision of similar items is higher. Multiple such hash tables can then be constructed to answer near neighbor types of queries efficiently.</P>
+
+<P>There is a MinHashDriver class which works in the TestMinHashClustering unit test. This is not included in the standard driver.props class, but it can be run by specifying the full package name.</P>
+
+<H4><A name="MinhashClustering-RunningMinHashDriverontheReuters21578Collection"></A>Running MinHashDriver on the Reuters-21578 Collection</H4>
+
+<P>There are two ways of doing this:</P>
+
+<H5><A name="MinhashClustering-Runclusterreuters.sh"></A>Run cluster-reuters.sh</H5>
+
+<OL>
+	<LI>cd $MAHOUT_HOME/examples/bin/cluster-reuters.sh  (trunk only)</LI>
+	<LI>Select the Minhash algorithm when prompted.</LI>
+</OL>
+
+
+<H5><A name="MinhashClustering-StepByStep"></A>Step By Step</H5>
+
+<H6><A name="MinhashClustering-1.%26nbsp%3BDownloadtheReuters21578Datasetfrom"></A>1.&nbsp; Download the Reuters-21578 Dataset from <A href="http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz" class="external-link" rel="nofollow">http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz</A> and extract this under /examples/reuters folder.</H6>
+
+
+<P>The Reuters-21578 collection has about 22578 documents in SGML format.&nbsp; These need to be converted to text files to subsequently generate the SequenceFiles and SparseVectors.</P>
+
+<P>To convert the SGML files to Text, we invoke the ExtractReuters utility that comes with Lucene. This creates text files from SGML containing - Title, Date, Body.</P>
+
+<H6><A name="MinhashClustering-2.%26nbsp%3B%26nbsp%3BRuntheReutersextractioncodefromtheexamplesdirectoryasfollows%3A"></A>2.&nbsp;&nbsp; Run the Reuters extraction code from the examples directory as follows:</H6>
+
+<P>mvn &#45;e &#45;q exec:java<BR>
+&#45;Dexec.mainClass=&quot;org.apache.lucene.benchmark.utils.ExtractReuters&quot;<BR>
+&#45;Dexec.args=&quot;reuters/ reuters-extracted/&quot;</P>
+
+<H6><A name="MinhashClustering-3.CreateSequenceFilesfromtheconvertedReutersTextfiles"></A>3. Create SequenceFiles from the converted Reuters Text files</H6>
+
+<P>bin/mahout seqdirectory &#45;c UTF-8 &#45;i examples/reuters-extracted/ &#45;o reuters-seqfiles</P>
+
+<P>This will write the Reuters documents into Sequence files.</P>
+
+
+<H6><A name="MinhashClustering-4.CreateSparseVectorsfromtheSequenceFiles"></A>4. Create SparseVectors from the SequenceFiles</H6>
+
+
+<P>bin/mahout seq2sparse &#45;i &#45;ng 1 reuters-seqfiles/ &#45;o reuters-vectors &#45;ow</P>
+
+<P>The &#45;-ow flag is used to denote whether or not to overwrite<BR>
+the output folder</P>
+
+<P>The &#45;ng flag is the maximum size of NGrams to be selected from collection of documents</P>
+
+<H6><A name="MinhashClustering-5.RuntheMinHashDriveronthegeneratedSparseVectors"></A>5. Run the MinHashDriver on the generated SparseVectors</H6>
+
+<P>bin/mahout org.apache.mahout.clustering.minhash.MinHashDriver &#45;-input reuters-vector/tfidf-vectors/ &#45;o /minhash</P>
+
+<P>The resulting output in /minhash/part-r-00000 would be something like below</P>
+
+<P>97618498-357680743&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-006.sgm-25.txt<BR>
+97618498-357680743&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-007.sgm-660.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-015.sgm-697.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-014.sgm-99.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-009.sgm-705.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-000.sgm-495.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-009.sgm-732.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-010.sgm-473.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-000.sgm-15.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-009.sgm-872.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-010.sgm-547.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-006.sgm-366.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-002.sgm-53.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-000.sgm-569.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-019.sgm-366.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-003.sgm-540.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-019.sgm-154.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-004.sgm-372.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-000.sgm-3.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-002.sgm-935.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-013.sgm-567.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-004.sgm-938.txt<BR>
+97618498-61898030&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-004.sgm-620.txt<BR>
+97618498-92898924&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-018.sgm-316.txt<BR>
+97618498-92898924&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-007.sgm-976.txt<BR>
+97618498-92898924&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-003.sgm-796.txt<BR>
+97618498-92898924&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-006.sgm-176.txt<BR>
+97618498-92898924&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-004.sgm-290.txt<BR>
+97618498-92898924&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /reut2-004.sgm-248.txt</P>
+
+<P>The first column is the &lt;Cluster-Id&gt; and the second column is &lt;reuters-text-filename&gt;.</P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/mr-map-reduce.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/mr-map-reduce.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/mr-map-reduce.html (added)
+++ mahout/site/new_website/MAHOUT/mr-map-reduce.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,142 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>MR - Map Reduce</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="glossary.html" title="Glossary">Glossary</A>&nbsp;&gt;&nbsp;<A href="" title="MR - Map Reduce">MR - Map Reduce</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">MR - Map Reduce</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=10846325">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=10846325">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=10846325">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=10846325">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=10846325">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=10846325">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>MapReduce is a framework for processing huge datasets on certain kinds of distributable problems using a large number of computers (nodes), collectively referred to as a cluster. Computational processing can occur on data stored either in a filesystem (unstructured) or within a database (structured).</P>
+
+<P>&nbsp; Also written M/R</P>
+
+
+<P>&nbsp; See Also</P>
+<UL>
+	<LI><A href="http://wiki.apache.org/hadoop/HadoopMapReduce" class="external-link" rel="nofollow">http://wiki.apache.org/hadoop/HadoopMapReduce</A></LI>
+	<LI><A href="http://en.wikipedia.org/wiki/MapReduce" class="external-link" rel="nofollow">http://en.wikipedia.org/wiki/MapReduce</A></LI>
+</UL>
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/naivebayes.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/naivebayes.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/naivebayes.html (added)
+++ mahout/site/new_website/MAHOUT/naivebayes.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,160 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>NaiveBayes</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="bayesian.html" title="Bayesian">Bayesian</A>&nbsp;&gt;&nbsp;<A href="" title="NaiveBayes">NaiveBayes</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">NaiveBayes</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75078">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75078">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75078">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75078">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75078">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75078">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="NaiveBayes-NaiveBayes"></A>Naive Bayes</H1>
+
+<P>Naive Bayes is an algorithm that can be used to classify objects into usually binary categories. It is one of the most common learning algorithms in spam filters. Despite its simplicity and rather naive assumptions it has proven to work surprisingly well in practice.</P>
+
+<P>Before applying the algorithm, the objects to be classified need to be represented by numerical features. In the case of e-mail spam each feature might indicate whether some specific word is present or absent in the mail to classify. The algorithm comes in two phases: Learning and application.<BR>
+During learning, a set of feature vectors is given to the algorithm, each vector labeled with the class the object it represents, belongs to. From that it is deduced which combination of features appears with high probability in spam messages. Given this information, during application one can easily compute the probability of a new message being either spam or not.</P>
+
+<P>The algorithm does make several assumptions, that are not true for most datasets, but make computations easier. The worst probably being, that all features of an objects are considered independent. In practice, that means, given the phrase &quot;Statue of Liberty&quot; was already found in a text, does not influence the probability of seeing the phrase &quot;New York&quot; as well.</P>
+
+<H2><A name="NaiveBayes-StrategyforaparallelNaiveBayes"></A>Strategy for a parallel Naive Bayes</H2>
+
+<P>See <A href="https://issues.apache.org/jira/browse/MAHOUT-9" class="external-link" rel="nofollow">https://issues.apache.org/jira/browse/MAHOUT-9</A>.</P>
+
+
+<H2><A name="NaiveBayes-Examples"></A>Examples</H2>
+
+<P><A href="20newsgroups.html" title="20Newsgroups">20Newsgroups</A> - Example code showing how to train and use the Naive Bayes classifier using the 20 Newsgroups data available at <A href="http://people.csail.mit.edu/jrennie/20Newsgroups/" class="external-link" rel="nofollow">http://people.csail.mit.edu/jrennie/20Newsgroups/</A></P>
+        </DIV>
+
+                  <DIV class="tabletitle">
+            Children
+            <SPAN class="smalltext" id="show" style="display: inline;">
+              <A href="javascript:showChildren()">Show Children</A></SPAN>
+            <SPAN class="smalltext" id="hide" style="display: none;">
+              <A href="javascript:hideChildren()">Hide Children</A></SPAN>
+          </DIV>
+          <DIV class="greybox" id="children" style="display: none;">
+                                      <A href="20newsgroups.html" title="20Newsgroups">20Newsgroups</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                      </DIV>
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/neural-network.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/neural-network.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/neural-network.html (added)
+++ mahout/site/new_website/MAHOUT/neural-network.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,139 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Neural Network</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Neural Network">Neural Network</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Neural Network</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75593">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=75593">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75593">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=75593">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75593">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=75593">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="NeuralNetwork-NeuralNetworks"></A>Neural Networks</H1>
+
+<P>Neural Networks are a means for classifying multi dimensional objects. We concentrate on implementing back propagation networks with one hidden layer as these networks have been covered by the <A href="http://www.cs.stanford.edu/people/ang/papers/nips06-mapreducemulticore.pdf" class="external-link" rel="nofollow">2006 NIPS map reduce paper</A>. Those networks are capable of learning not only linear separating hyper planes but arbitrary decision boundaries.</P>
+
+<H2><A name="NeuralNetwork-Strategyforparallelbackpropagationnetwork"></A>Strategy for parallel backpropagation network</H2>
+
+
+<H2><A name="NeuralNetwork-Designofimplementation"></A>Design of implementation</H2>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/online-passive-aggressive.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/online-passive-aggressive.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/online-passive-aggressive.html (added)
+++ mahout/site/new_website/MAHOUT/online-passive-aggressive.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,162 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Online Passive Aggressive</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Online Passive Aggressive">Online Passive Aggressive</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Online Passive Aggressive</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=26120408">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=26120408">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=26120408">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=26120408">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=26120408">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=26120408">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Online Passive Aggressive</P>
+
+<P>Implements <A href="http://www.google.com/url?sa=t&source=web&cd=1&ved=0CCIQFjAA&url=http:%2F%2Fciteseer.ist.psu.edu%2Fviewdoc%2Fdownload%3Bjsessionid%3DF4743238B0EF35EB396A5ABFF1332021%3Fdoi%3D10.1.1.61.5120%26rep%3Drep1%26type%3Dpdf&rct=j&q=online%20passive%20aggressive&ei=elvWTa6jBcfHrQf8o52KBg&usg=AFQjCNGqNjaHyWgT4Z3QrK7hEqSTGM10YQ&sig2=-szWIrzBLoQ52jBER9-I0Q&cad=rja" class="external-link" rel="nofollow">http://www.google.com/url?sa=t&amp;source=web&amp;cd=1&amp;ved=0CCIQFjAA&amp;url=http%3A%2F%2Fciteseer.ist.psu.edu%2Fviewdoc%2Fdownload%3Bjsessionid%3DF4743238B0EF35EB396A5ABFF1332021%3Fdoi%3D10.1.1.61.5120%26rep%3Drep1%26type%3Dpdf&amp;rct=j&amp;q=online%20passive%20aggressive&amp;ei=elvWTa6jBcfHrQf8o52KBg&amp;usg=AFQjCNGqNjaHyWgT4Z3QrK7hEqSTGM10YQ&amp;sig2=-szWIrzBLoQ52jBER9-I0Q&amp;cad=rja</A>.</P>
+
+<P>Use cases:</P>
+
+<P>  When you have many classes that are linearly separable and want a fast online learner to get results quickly.</P>
+
+<P>Pre-requisites:</P>
+
+<P>  Data must be shuffled and normalized either between 0..1 or by mean and standard deviation.</P>
+
+<P>Technical details:</P>
+
+<P>  The training approach taken is to minimize the ranking loss of the correct label vs the incorrect ones. We define this loss as hinge(1 - correct label score + wrong label score) where wrong label score is the score of the highest scoring label that is not the correct label. The hinge function is hinge<IMG class="emoticon" src="https://cwiki.apache.org/confluence/images/icons/emoticons/error.gif" height="16" width="16" align="absmiddle" alt="" border="0"> = x if x &gt; 0, 0 otherwise.</P>
+
+<P>Parameters:</P>
+
+<P>  There is only one - learningRate. You set it to a larger number to converge faster, or a smaller number to be more cautious. The normal way to use it is via cross validation. Good values are (0.1, 1.0, 10.0).</P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/online-viterbi.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/online-viterbi.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/online-viterbi.html (added)
+++ mahout/site/new_website/MAHOUT/online-viterbi.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,154 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Online Viterbi</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Online Viterbi">Online Viterbi</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Online Viterbi</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=27832702">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=27832702">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=27832702">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=27832702">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=27832702">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=27832702">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Online Viterbi algorithm implementation which could decode hidden variable sequence from the given sequence of observed variables as soon as some part of input sequence could be decoded. In some cases this algorithm may perform at the constant space and asymptotically same time as the normal Viterbi [1].</P>
+
+<P>Online Viterbi stores special compressed tree representation of backpointers where only potentially optimal paths of hidden states are present. When they all intersects in one point backward pass could be performed from this state. </P>
+
+<H3><A name="OnlineViterbi-Usage"></A>Usage</H3>
+
+<P>You can use Online Viterbi just as the normal Viterbi by running &quot;bin/mahout viterbi &#45;online&quot; for sequential computation and &quot;bin/mahout poviterbi&quot; for parallel.</P>
+
+<H3><A name="OnlineViterbi-References"></A>References</H3>
+
+<P>[1]&nbsp;Rastislav Sramek. The Online Viterbi algorithm (Master's Thesis). 2007</P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/overview.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/overview.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/overview.html (added)
+++ mahout/site/new_website/MAHOUT/overview.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,157 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Overview</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="" title="Overview">Overview</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Overview</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=22872406">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=22872406">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=22872406">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=22872406">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=22872406">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=22872406">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="Overview-OverviewofMahout"></A>Overview of Mahout</H1>
+
+<P>Mahout's goal is to build scalable machine learning libraries. With scalable we mean: </P>
+<UL>
+	<LI>Scalable to reasonably large data sets. Our core algorithms for clustering, classfication and batch based collaborative filtering are implemented on top of Apache Hadoop using the map/reduce paradigm. However we do not restrict contributions to Hadoop based implementations: Contributions that run on a single node or on a non-Hadoop cluster are welcome as well. The core libraries are highly optimized to allow for good performance also for non-distributed algorithms.</LI>
+	<LI>Scalable to support your business case. Mahout is distributed under a commercially friendly Apache Software license.</LI>
+	<LI>Scalable community. The goal of Mahout is to build a vibrant, responsive, diverse community to facilitate discussions not only on the project itself but also on potential use cases. Come to the mailing lists to find out more.</LI>
+</UL>
+
+
+
+<P>Currently Mahout supports mainly four use cases: Recommendation mining takes users' behavior and from that tries to find items users might like. Clustering takes e.g. text documents and groups them into groups of topically related documents. Classification learns from exisiting categorized documents what documents of a specific category look like and is able to assign unlabelled documents to the (hopefully) correct category. Frequent itemset mining takes a set of item groups (terms in a query session, shopping cart content) and identifies, which individual items usually appear together. </P>
+
+<P>Interested in helping? See the <A href="http://cwiki.apache.org/confluence/display/MAHOUT" class="external-link" rel="nofollow">Wiki</A> or send us an email. Also note, we are just getting off the ground, so please be patient as we get the various infrastructure pieces in place.</P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file



Mime
View raw message