mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From build...@apache.org
Subject svn commit: r944380 [12/24] - in /websites/staging/mahout/trunk/content: ./ developers/ general/ users/basics/ users/classification/ users/clustering/ users/dim-reduction/ users/mapreduce/ users/mapreduce/classification/ users/mapreduce/clustering/ use...
Date Thu, 19 Mar 2015 21:21:47 GMT
Added: websites/staging/mahout/trunk/content/users/mapreduce/classification/support-vector-machines.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/mapreduce/classification/support-vector-machines.html (added)
+++ websites/staging/mahout/trunk/content/users/mapreduce/classification/support-vector-machines.html Thu Mar 19 21:21:45 2015
@@ -0,0 +1,308 @@
+<!DOCTYPE html>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <title>Apache Mahout: Scalable machine learning and data mining</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="Distribution" content="Global">
+  <meta name="Robots" content="index,follow">
+  <meta name="keywords" content="apache, apache hadoop, apache lucene,
+        business data mining, cluster analysis,
+        collaborative filtering, data extraction, data filtering, data framework, data integration,
+        data matching, data mining, data mining algorithms, data mining analysis, data mining data,
+        data mining introduction, data mining software,
+        data mining techniques, data representation, data set, datamining,
+        feature extraction, fuzzy k means, genetic algorithm, hadoop,
+        hierarchical clustering, high dimensional, introduction to data mining, kmeans,
+        knowledge discovery, learning approach, learning approaches, learning methods,
+        learning techniques, lucene, machine learning, machine translation, mahout apache,
+        mahout taste, map reduce hadoop, mining data, mining methods, naive bayes,
+        natural language processing,
+        supervised, text mining, time series data, unsupervised, web data mining">
+  <link rel="shortcut icon" type="image/x-icon" href="http://mahout.apache.org/images/favicon.ico">
+  <script type="text/javascript" src="/js/prototype.js"></script>
+  <script type="text/javascript" src="/js/effects.js"></script>
+  <script type="text/javascript" src="/js/search.js"></script>
+  <script type="text/javascript" src="/js/slides.js"></script>
+
+  <link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
+  <link href="/css/bootstrap-responsive.css" rel="stylesheet">
+  <link rel="stylesheet" href="/css/global.css" type="text/css">
+
+  <!-- mathJax stuff -- use `\(...\)` for inline style math in markdown -->
+  <script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    tex2jax: {
+      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
+    }
+  });
+  MathJax.Hub.Queue(function() {
+    var all = MathJax.Hub.getAllJax(), i;
+    for(i = 0; i < all.length; i += 1) {
+      all[i].SourceElement().parentNode.className += ' has-jax';
+    }
+  });
+  </script>
+  <script type="text/javascript">
+    var mathjax = document.createElement('script'); 
+    mathjax.type = 'text/javascript'; 
+    mathjax.async = true;
+
+    mathjax.src = ('https:' == document.location.protocol) ?
+        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' : 
+        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+	
+	  var s = document.getElementsByTagName('script')[0]; 
+    s.parentNode.insertBefore(mathjax, s);
+  </script>
+</head>
+
+<body id="home" data-twttr-rendered="true">
+  <div id="wrap">
+   <div id="header">
+    <div id="logo"><a href="/overview.html"></a></div>
+  <div id="search">
+    <form id="search-form" action="http://www.google.com/search" method="get" class="navbar-search pull-right">    
+      <input value="http://mahout.apache.org" name="sitesearch" type="hidden">
+      <input class="search-query" name="q" id="query" type="text">
+      <input id="submission" type="image" src="/images/mahout-lupe.png" alt="Search" />
+    </form>
+  </div>
+
+    <div class="navbar navbar-inverse" style="position:absolute;top:133px;padding-right:0px;padding-left:0px;">
+      <div class="navbar-inner" style="border: none; background: #999; border: none; border-radius: 0px;">
+        <div class="container">
+          <button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <!-- <a class="brand" href="#">Apache Community Development Project</a> -->
+          <div class="nav-collapse collapse">
+            <ul class="nav">
+              <li><a href="/">Home</a></li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/general/downloads.html">Downloads</a>
+                  <li><a href="/general/who-we-are.html">Who we are</a>
+                  <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
+                  <li><a href="/general/release-notes.html">Release Notes</a> 
+                  <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
+                  <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
+                  <li><a href="/general/professional-support.html">Professional Support</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Resources</li>
+                  <li><a href="/general/reference-reading.html">Reference Reading</a>
+                  <li><a href="/general/faq.html">FAQ</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Legal</li>
+                  <li><a href="http://www.apache.org/licenses/">License</a></li>
+                  <li><a href="http://www.apache.org/security/">Security</a></li>
+                  <li><a href="/general/privacy-policy.html">Privacy Policy</a>
+                </ul>
+              </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/developers/developer-resources.html">Developer resources</a></li>
+                  <li><a href="/developers/version-control.html">Version control</a></li>
+                  <li><a href="/developers/buildingmahout.html">Build from source</a></li>
+                  <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
+                  <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Contributions</li>
+                  <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
+                  <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
+                  <li><a href="/developers/gsoc.html">GSoC</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">For committers</li>
+                  <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
+                  <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
+                  <li><a href="/developers/github.html">Handling Github PRs</a></li>
+                  <li><a href="/developers/how-to-release.html">How to release</a></li>
+                  <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Basics<b class="caret"></b></a>
+                 <ul class="dropdown-menu">
+                  <li><a href="/users/basics/algorithms.html">List of algorithms</a>
+                  <li><a href="/users/basics/quickstart.html">Quickstart</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Working with text</li>
+                  <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
+                  <li><a href="/users/basics/collocations.html">Collocations</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Dimensionality reduction</li>
+                  <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
+                  <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Topic Models</li>      
+                  <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
+                </ul>
+                 </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Spark<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
+                  <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
+			      <li class="divider"></li>
+                  <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
+                </ul>
+               </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Classification<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
+                  <li><a href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
+                  <li><a href="/users/mapreduce/classification/logistic-regression.html">Logistic Regression</a></li>
+                  <li><a href="/users/mapreduce/classification/partial-implementation.html">Random Forest</a></li>
+
+                  <li class="divider"></li>
+                  <li class="nav-header">Examples</li>
+                  <li><a href="/users/mapreduce/classification/breiman-example.html">Breiman example</a></li>
+                  <li><a href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
+                </ul></li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Clustering<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/streaming-k-means.html">Streaming KMeans</a></li>
+                <li><a href="/users/mapreduce/clustering/spectral-clustering.html">Spectral Clustering</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Commandline usage</li>
+                <li><a href="/users/mapreduce/clustering/k-means-commandline.html">Options for k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-commandline.html">Options for Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Examples</li>
+                <li><a href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Post processing</li>
+                <li><a href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
+                <li><a href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
+                </ul></li>
+                <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
+                <li><a href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
+		<li><a href="/users/mapreduce/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Hadoop</li>
+                <li><a href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
+                <li><a href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
+                <li class="nav-header">Spark</li>
+                <li><a href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
+              </ul>
+            </li>
+           </ul>
+          </div><!--/.nav-collapse -->
+        </div>
+      </div>
+    </div>
+
+</div>
+
+ <div id="sidebar">
+  <div id="sidebar-wrap">
+    <h2>Twitter</h2>
+	<ul class="sidemenu">
+		<li>
+<a class="twitter-timeline" href="https://twitter.com/ApacheMahout" data-widget-id="422861673444028416">Tweets by @ApacheMahout</a>
+<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+"://platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
+</li>
+	</ul>
+    <h2>Apache Software Foundation</h2>
+    <ul class="sidemenu">
+      <li><a href="http://www.apache.org/foundation/how-it-works.html">How the ASF works</a></li>
+      <li><a href="http://www.apache.org/foundation/getinvolved.html">Get Involved</a></li>
+      <li><a href="http://www.apache.org/dev/">Developer Resources</a></li>
+      <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+      <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+    </ul>
+    <h2>Related Projects</h2>
+    <ul class="sidemenu">
+      <li><a href="http://lucene.apache.org/">Lucene</a></li>
+      <li><a href="http://hadoop.apache.org/">Hadoop</a></li>
+    </ul>
+  </div>
+</div>
+
+  <div id="content-wrap" class="clearfix">
+   <div id="main">
+    <p><a name="SupportVectorMachines-SupportVectorMachines"></a></p>
+<h1 id="support-vector-machines">Support Vector Machines</h1>
+<p>As with Naive Bayes, Support Vector Machines (or SVMs in short) can be used
+to solve the task of assigning objects to classes. However, the way this
+task is solved is completely different to the setting in Naive Bayes.</p>
+<p>Each object is considered to be a point in <em>n</em> dimensional feature space,
+<em>n</em> being the number of features used to describe the objects numerically.
+In addition each object is assigned a binary label, let us assume the
+labels are "positive" and "negative". During learning, the algorithm tries
+to find a hyperplane in that space, that perfectly separates positive from
+negative objects.
+It is trivial to think of settings where this might very well be
+impossible. To remedy this situation, objects can be assigned so called
+slack terms, that punish mistakes made during learning appropriately. That
+way, the algorithm is forced to find the hyperplane that causes the least
+number of mistakes.</p>
+<p>Another way to overcome the problem of there being no linear hyperplane to
+separate positive from negative objects is to simply project each feature
+vector into an higher dimensional feature space and search for a linear
+separating hyperplane in that new space. Usually the main problem with
+learning in high dimensional feature spaces is the so called curse of
+dimensionality. That is, there are fewer learning examples available than
+free parameters to tune. In the case of SVMs this problem is less
+detrimental, as SVMs impose additional structural constraints on their
+solutions. Each separating hyperplane needs to have a maximal margin to all
+training examples. In addition, that way, the solution may be based on the
+information encoded in only very few examples.</p>
+<p><a name="SupportVectorMachines-Strategyforparallelization"></a></p>
+<h2 id="strategy-for-parallelization">Strategy for parallelization</h2>
+<p><a name="SupportVectorMachines-Designofpackages"></a></p>
+<h2 id="design-of-packages">Design of packages</h2>
+   </div>
+  </div>     
+</div> 
+  <footer class="footer" align="center">
+    <div class="container">
+      <p>
+        Copyright &copy; 2014 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+        Apache and the Apache feather logos are trademarks of The Apache Software Foundation.
+      </p>
+    </div>
+  </footer>
+  
+  <script src="/js/jquery-1.9.1.min.js"></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script>
+    (function() {
+      var cx = '012254517474945470291:vhsfv7eokdc';
+      var gcse = document.createElement('script');
+      gcse.type = 'text/javascript';
+      gcse.async = true;
+      gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
+          '//www.google.com/cse/cse.js?cx=' + cx;
+      var s = document.getElementsByTagName('script')[0];
+      s.parentNode.insertBefore(gcse, s);
+    })();
+  </script>
+</body>
+</html>

Added: websites/staging/mahout/trunk/content/users/mapreduce/classification/twenty-newsgroups.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/mapreduce/classification/twenty-newsgroups.html (added)
+++ websites/staging/mahout/trunk/content/users/mapreduce/classification/twenty-newsgroups.html Thu Mar 19 21:21:45 2015
@@ -0,0 +1,481 @@
+<!DOCTYPE html>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <title>Apache Mahout: Scalable machine learning and data mining</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="Distribution" content="Global">
+  <meta name="Robots" content="index,follow">
+  <meta name="keywords" content="apache, apache hadoop, apache lucene,
+        business data mining, cluster analysis,
+        collaborative filtering, data extraction, data filtering, data framework, data integration,
+        data matching, data mining, data mining algorithms, data mining analysis, data mining data,
+        data mining introduction, data mining software,
+        data mining techniques, data representation, data set, datamining,
+        feature extraction, fuzzy k means, genetic algorithm, hadoop,
+        hierarchical clustering, high dimensional, introduction to data mining, kmeans,
+        knowledge discovery, learning approach, learning approaches, learning methods,
+        learning techniques, lucene, machine learning, machine translation, mahout apache,
+        mahout taste, map reduce hadoop, mining data, mining methods, naive bayes,
+        natural language processing,
+        supervised, text mining, time series data, unsupervised, web data mining">
+  <link rel="shortcut icon" type="image/x-icon" href="http://mahout.apache.org/images/favicon.ico">
+  <script type="text/javascript" src="/js/prototype.js"></script>
+  <script type="text/javascript" src="/js/effects.js"></script>
+  <script type="text/javascript" src="/js/search.js"></script>
+  <script type="text/javascript" src="/js/slides.js"></script>
+
+  <link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
+  <link href="/css/bootstrap-responsive.css" rel="stylesheet">
+  <link rel="stylesheet" href="/css/global.css" type="text/css">
+
+  <!-- mathJax stuff -- use `\(...\)` for inline style math in markdown -->
+  <script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    tex2jax: {
+      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
+    }
+  });
+  MathJax.Hub.Queue(function() {
+    var all = MathJax.Hub.getAllJax(), i;
+    for(i = 0; i < all.length; i += 1) {
+      all[i].SourceElement().parentNode.className += ' has-jax';
+    }
+  });
+  </script>
+  <script type="text/javascript">
+    var mathjax = document.createElement('script'); 
+    mathjax.type = 'text/javascript'; 
+    mathjax.async = true;
+
+    mathjax.src = ('https:' == document.location.protocol) ?
+        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' : 
+        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+	
+	  var s = document.getElementsByTagName('script')[0]; 
+    s.parentNode.insertBefore(mathjax, s);
+  </script>
+</head>
+
+<body id="home" data-twttr-rendered="true">
+  <div id="wrap">
+   <div id="header">
+    <div id="logo"><a href="/overview.html"></a></div>
+  <div id="search">
+    <form id="search-form" action="http://www.google.com/search" method="get" class="navbar-search pull-right">    
+      <input value="http://mahout.apache.org" name="sitesearch" type="hidden">
+      <input class="search-query" name="q" id="query" type="text">
+      <input id="submission" type="image" src="/images/mahout-lupe.png" alt="Search" />
+    </form>
+  </div>
+
+    <div class="navbar navbar-inverse" style="position:absolute;top:133px;padding-right:0px;padding-left:0px;">
+      <div class="navbar-inner" style="border: none; background: #999; border: none; border-radius: 0px;">
+        <div class="container">
+          <button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <!-- <a class="brand" href="#">Apache Community Development Project</a> -->
+          <div class="nav-collapse collapse">
+            <ul class="nav">
+              <li><a href="/">Home</a></li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/general/downloads.html">Downloads</a>
+                  <li><a href="/general/who-we-are.html">Who we are</a>
+                  <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
+                  <li><a href="/general/release-notes.html">Release Notes</a> 
+                  <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
+                  <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
+                  <li><a href="/general/professional-support.html">Professional Support</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Resources</li>
+                  <li><a href="/general/reference-reading.html">Reference Reading</a>
+                  <li><a href="/general/faq.html">FAQ</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Legal</li>
+                  <li><a href="http://www.apache.org/licenses/">License</a></li>
+                  <li><a href="http://www.apache.org/security/">Security</a></li>
+                  <li><a href="/general/privacy-policy.html">Privacy Policy</a>
+                </ul>
+              </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/developers/developer-resources.html">Developer resources</a></li>
+                  <li><a href="/developers/version-control.html">Version control</a></li>
+                  <li><a href="/developers/buildingmahout.html">Build from source</a></li>
+                  <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
+                  <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Contributions</li>
+                  <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
+                  <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
+                  <li><a href="/developers/gsoc.html">GSoC</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">For committers</li>
+                  <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
+                  <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
+                  <li><a href="/developers/github.html">Handling Github PRs</a></li>
+                  <li><a href="/developers/how-to-release.html">How to release</a></li>
+                  <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Basics<b class="caret"></b></a>
+                 <ul class="dropdown-menu">
+                  <li><a href="/users/basics/algorithms.html">List of algorithms</a>
+                  <li><a href="/users/basics/quickstart.html">Quickstart</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Working with text</li>
+                  <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
+                  <li><a href="/users/basics/collocations.html">Collocations</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Dimensionality reduction</li>
+                  <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
+                  <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Topic Models</li>      
+                  <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
+                </ul>
+                 </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Spark<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
+                  <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
+			      <li class="divider"></li>
+                  <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
+                </ul>
+               </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Classification<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
+                  <li><a href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
+                  <li><a href="/users/mapreduce/classification/logistic-regression.html">Logistic Regression</a></li>
+                  <li><a href="/users/mapreduce/classification/partial-implementation.html">Random Forest</a></li>
+
+                  <li class="divider"></li>
+                  <li class="nav-header">Examples</li>
+                  <li><a href="/users/mapreduce/classification/breiman-example.html">Breiman example</a></li>
+                  <li><a href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
+                </ul></li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Clustering<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/streaming-k-means.html">Streaming KMeans</a></li>
+                <li><a href="/users/mapreduce/clustering/spectral-clustering.html">Spectral Clustering</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Commandline usage</li>
+                <li><a href="/users/mapreduce/clustering/k-means-commandline.html">Options for k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-commandline.html">Options for Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Examples</li>
+                <li><a href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Post processing</li>
+                <li><a href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
+                <li><a href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
+                </ul></li>
+                <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
+                <li><a href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
+		<li><a href="/users/mapreduce/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Hadoop</li>
+                <li><a href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
+                <li><a href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
+                <li class="nav-header">Spark</li>
+                <li><a href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
+              </ul>
+            </li>
+           </ul>
+          </div><!--/.nav-collapse -->
+        </div>
+      </div>
+    </div>
+
+</div>
+
+ <div id="sidebar">
+  <div id="sidebar-wrap">
+    <h2>Twitter</h2>
+	<ul class="sidemenu">
+		<li>
+<a class="twitter-timeline" href="https://twitter.com/ApacheMahout" data-widget-id="422861673444028416">Tweets by @ApacheMahout</a>
+<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+"://platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
+</li>
+	</ul>
+    <h2>Apache Software Foundation</h2>
+    <ul class="sidemenu">
+      <li><a href="http://www.apache.org/foundation/how-it-works.html">How the ASF works</a></li>
+      <li><a href="http://www.apache.org/foundation/getinvolved.html">Get Involved</a></li>
+      <li><a href="http://www.apache.org/dev/">Developer Resources</a></li>
+      <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+      <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+    </ul>
+    <h2>Related Projects</h2>
+    <ul class="sidemenu">
+      <li><a href="http://lucene.apache.org/">Lucene</a></li>
+      <li><a href="http://hadoop.apache.org/">Hadoop</a></li>
+    </ul>
+  </div>
+</div>
+
+  <div id="content-wrap" class="clearfix">
+   <div id="main">
+    <p><a name="TwentyNewsgroups-TwentyNewsgroupsClassificationExample"></a></p>
+<h2 id="twenty-newsgroups-classification-example">Twenty Newsgroups Classification Example</h2>
+<p><a name="TwentyNewsgroups-Introduction"></a></p>
+<h2 id="introduction">Introduction</h2>
+<p>The 20 newsgroups dataset is a collection of approximately 20,000
+newsgroup documents, partitioned (nearly) evenly across 20 different
+newsgroups. The 20 newsgroups collection has become a popular data set for
+experiments in text applications of machine learning techniques, such as
+text classification and text clustering. We will use the <a href="http://mahout.apache.org/users/mapreduce/classification/bayesian.html">Mahout CBayes</a>
+classifier to create a model that would classify a new document into one of
+the 20 newsgroups.</p>
+<p><a name="TwentyNewsgroups-Prerequisites"></a></p>
+<h3 id="prerequisites">Prerequisites</h3>
+<ul>
+<li>Mahout has been downloaded (<a href="https://mahout.apache.org/general/downloads.html">instructions here</a>)</li>
+<li>Maven is available</li>
+<li>Your environment has the following variables:<ul>
+<li><strong>HADOOP_HOME</strong> Environment variables refers to where Hadoop lives </li>
+<li><strong>MAHOUT_HOME</strong> Environment variables refers to where Mahout lives</li>
+</ul>
+</li>
+</ul>
+<p><a name="TwentyNewsgroups-Instructionsforrunningtheexample"></a></p>
+<h3 id="instructions-for-running-the-example">Instructions for running the example</h3>
+<ol>
+<li>
+<p>If running Hadoop in cluster mode, start the hadoop daemons by executing the following commands:</p>
+<div class="codehilite"><pre>    $ <span class="n">cd</span> $<span class="n">HADOOP_HOME</span><span class="o">/</span><span class="n">bin</span>
+    $ <span class="o">./</span><span class="n">start</span><span class="o">-</span><span class="n">all</span><span class="p">.</span><span class="n">sh</span>
+</pre></div>
+
+
+<p>Otherwise:</p>
+<div class="codehilite"><pre>    $ <span class="n">export</span> <span class="n">MAHOUT_LOCAL</span><span class="p">=</span><span class="n">true</span>
+</pre></div>
+
+
+</li>
+<li>
+<p>In the trunk directory of Mahout, compile and install Mahout:</p>
+<div class="codehilite"><pre>    $ <span class="n">cd</span> $<span class="n">MAHOUT_HOME</span>
+    $ <span class="n">mvn</span> <span class="o">-</span><span class="n">DskipTests</span> <span class="n">clean</span> <span class="n">install</span>
+</pre></div>
+
+
+</li>
+<li>
+<p>Run the <a href="https://github.com/apache/mahout/blob/master/examples/bin/classify-20newsgroups.sh">20 newsgroups example script</a> by executing:</p>
+<div class="codehilite"><pre>    $ <span class="o">./</span><span class="n">examples</span><span class="o">/</span><span class="n">bin</span><span class="o">/</span><span class="n">classify</span><span class="o">-</span>20<span class="n">newsgroups</span><span class="p">.</span><span class="n">sh</span>
+</pre></div>
+
+
+</li>
+<li>
+<p>You will be prompted to select a classification method algorithm: </p>
+<div class="codehilite"><pre>    1<span class="p">.</span> <span class="n">Complement</span> <span class="n">Naive</span> <span class="n">Bayes</span>
+    2<span class="p">.</span> <span class="n">Naive</span> <span class="n">Bayes</span>
+    3<span class="p">.</span> <span class="n">Stochastic</span> <span class="n">Gradient</span> <span class="n">Descent</span>
+</pre></div>
+
+
+</li>
+</ol>
+<p>Select 1 and the the script will perform the following:</p>
+<ol>
+<li>Create a working directory for the dataset and all input/output.</li>
+<li>Download and extract the <em>20news-bydate.tar.gz</em> from the <a href="http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz">20 newsgroups dataset</a> to the working directory.</li>
+<li>Convert the full 20 newsgroups dataset into a &lt; Text, Text &gt; SequenceFile. </li>
+<li>Convert and preprocesses the dataset into a &lt; Text, VectorWritable &gt; SequenceFile containing term frequencies for each document.</li>
+<li>Split the preprocessed dataset into training and testing sets. </li>
+<li>Train the classifier.</li>
+<li>Test the classifier.</li>
+</ol>
+<p>Output should look something like:</p>
+<div class="codehilite"><pre><span class="o">=======================================================</span>
+Confusion Matrix
+<span class="o">-------------------------------------------------------</span>
+ a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t <span class="o">&lt;--</span>Classified as
+<span class="m">381</span> <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">9</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">3</span>  <span class="m">0</span> <span class="o">|</span><span class="m">398</span> a<span class="o">=</span>rec.motorcycles
+ <span class="m">1</span> <span class="m">284</span> <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">6</span>  <span class="m">3</span>  <span class="m">11</span> <span class="m">0</span>  <span class="m">66</span> <span class="m">3</span>  <span class="m">0</span>  <span class="m">6</span>  <span class="m">0</span>  <span class="m">4</span>  <span class="m">9</span>  <span class="m">0</span> <span class="o">|</span><span class="m">395</span> b<span class="o">=</span>comp.windows.x
+ <span class="m">2</span>  <span class="m">0</span> <span class="m">339</span> <span class="m">2</span>  <span class="m">0</span>  <span class="m">3</span>  <span class="m">5</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">1</span>  <span class="m">12</span> <span class="m">1</span>  <span class="m">7</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">0</span> <span class="o">|</span><span class="m">376</span> c<span class="o">=</span>talk.politics.mideast
+ <span class="m">4</span>  <span class="m">0</span>  <span class="m">1</span> <span class="m">327</span> <span class="m">0</span>  <span class="m">2</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">1</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">5</span>  <span class="m">1</span>  <span class="m">4</span>  <span class="m">12</span> <span class="m">0</span>  <span class="m">2</span>  <span class="m">0</span> <span class="o">|</span><span class="m">364</span> d<span class="o">=</span>talk.politics.guns
+ <span class="m">7</span>  <span class="m">0</span>  <span class="m">4</span>  <span class="m">32</span> <span class="m">27</span> <span class="m">7</span>  <span class="m">7</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">12</span> <span class="m">0</span>  <span class="m">0</span>  <span class="m">6</span>  <span class="m">0</span> <span class="m">100</span> <span class="m">9</span>  <span class="m">7</span>  <span class="m">31</span> <span class="m">0</span>  <span class="m">0</span> <span class="o">|</span><span class="m">251</span> e<span class="o">=</span>talk.religion.misc
+ <span class="m">10</span> <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span> <span class="m">359</span> <span class="m">2</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">3</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">6</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">11</span> <span class="m">0</span> <span class="o">|</span><span class="m">396</span> f<span class="o">=</span>rec.autos
+ <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">1</span> <span class="m">383</span> <span class="m">9</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">3</span>  <span class="m">0</span>  <span class="m">0</span> <span class="o">|</span><span class="m">397</span> g<span class="o">=</span>rec.sport.baseball
+ <span class="m">1</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">9</span> <span class="m">382</span> <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">1</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">0</span> <span class="o">|</span><span class="m">399</span> h<span class="o">=</span>rec.sport.hockey
+ <span class="m">2</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">4</span>  <span class="m">3</span>  <span class="m">0</span> <span class="m">330</span> <span class="m">4</span>  <span class="m">4</span>  <span class="m">0</span>  <span class="m">5</span>  <span class="m">12</span> <span class="m">0</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">12</span> <span class="m">7</span> <span class="o">|</span><span class="m">385</span> i<span class="o">=</span>comp.sys.mac.hardware
+ <span class="m">0</span>  <span class="m">3</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">0</span> <span class="m">368</span> <span class="m">0</span>  <span class="m">0</span>  <span class="m">10</span> <span class="m">4</span>  <span class="m">1</span>  <span class="m">3</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">0</span> <span class="o">|</span><span class="m">394</span> j<span class="o">=</span>sci.space
+ <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">3</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">27</span> <span class="m">2</span> <span class="m">291</span> <span class="m">0</span>  <span class="m">11</span> <span class="m">25</span> <span class="m">0</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">13</span> <span class="m">18</span><span class="o">|</span><span class="m">392</span> k<span class="o">=</span>comp.sys.ibm.pc.hardware
+ <span class="m">8</span>  <span class="m">0</span>  <span class="m">1</span> <span class="m">109</span> <span class="m">0</span>  <span class="m">6</span>  <span class="m">11</span> <span class="m">4</span>  <span class="m">1</span>  <span class="m">18</span> <span class="m">0</span>  <span class="m">98</span> <span class="m">1</span>  <span class="m">3</span>  <span class="m">11</span> <span class="m">10</span> <span class="m">27</span> <span class="m">1</span>  <span class="m">1</span>  <span class="m">0</span> <span class="o">|</span><span class="m">310</span> l<span class="o">=</span>talk.politics.misc
+ <span class="m">0</span>  <span class="m">11</span> <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">3</span>  <span class="m">6</span>  <span class="m">0</span>  <span class="m">10</span> <span class="m">6</span>  <span class="m">11</span> <span class="m">0</span> <span class="m">299</span> <span class="m">13</span> <span class="m">0</span>  <span class="m">2</span>  <span class="m">13</span> <span class="m">0</span>  <span class="m">7</span>  <span class="m">8</span> <span class="o">|</span><span class="m">389</span> m<span class="o">=</span>comp.graphics
+ <span class="m">6</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">4</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">5</span>  <span class="m">2</span>  <span class="m">12</span> <span class="m">0</span>  <span class="m">8</span> <span class="m">321</span> <span class="m">0</span>  <span class="m">4</span>  <span class="m">14</span> <span class="m">0</span>  <span class="m">8</span>  <span class="m">6</span> <span class="o">|</span><span class="m">393</span> n<span class="o">=</span>sci.electronics
+ <span class="m">2</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">4</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">3</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">3</span>  <span class="m">1</span> <span class="m">372</span> <span class="m">6</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">1</span>  <span class="m">2</span> <span class="o">|</span><span class="m">398</span> o<span class="o">=</span>soc.religion.christian
+ <span class="m">4</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">3</span>  <span class="m">3</span>  <span class="m">0</span>  <span class="m">4</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">7</span>  <span class="m">12</span> <span class="m">6</span> <span class="m">342</span> <span class="m">1</span>  <span class="m">0</span>  <span class="m">9</span>  <span class="m">0</span> <span class="o">|</span><span class="m">396</span> p<span class="o">=</span>sci.med
+ <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">4</span>  <span class="m">0</span>  <span class="m">3</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">8</span>  <span class="m">4</span>  <span class="m">0</span>  <span class="m">2</span> <span class="m">369</span> <span class="m">0</span>  <span class="m">1</span>  <span class="m">1</span> <span class="o">|</span><span class="m">396</span> q<span class="o">=</span>sci.crypt
+ <span class="m">10</span> <span class="m">0</span>  <span class="m">4</span>  <span class="m">10</span> <span class="m">1</span>  <span class="m">5</span>  <span class="m">6</span>  <span class="m">2</span>  <span class="m">2</span>  <span class="m">6</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">1</span> <span class="m">86</span> <span class="m">15</span> <span class="m">14</span> <span class="m">152</span> <span class="m">0</span>  <span class="m">1</span> <span class="o">|</span><span class="m">319</span> r<span class="o">=</span>alt.atheism
+ <span class="m">4</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">9</span>  <span class="m">1</span>  <span class="m">1</span>  <span class="m">8</span>  <span class="m">1</span>  <span class="m">12</span> <span class="m">0</span>  <span class="m">3</span>  <span class="m">0</span>  <span class="m">2</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span> <span class="m">341</span> <span class="m">2</span> <span class="o">|</span><span class="m">390</span> s<span class="o">=</span>misc.forsale
+ <span class="m">8</span>  <span class="m">5</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">0</span>  <span class="m">1</span>  <span class="m">6</span>  <span class="m">0</span>  <span class="m">8</span>  <span class="m">5</span>  <span class="m">50</span> <span class="m">0</span>  <span class="m">40</span> <span class="m">2</span>  <span class="m">1</span>  <span class="m">0</span>  <span class="m">9</span>  <span class="m">0</span>  <span class="m">3</span> <span class="m">256</span><span class="o">|</span><span class="m">394</span> t<span class="o">=</span>comp.os.ms<span class="o">-</span>windows.misc
+<span class="o">=======================================================</span>
+Statistics
+<span class="o">-------------------------------------------------------</span>
+Kappa                                       <span class="m">0.8808</span>
+Accuracy                                   <span class="m">90.8596</span><span class="o">%</span>
+Reliability                                <span class="m">86.3632</span><span class="o">%</span>
+Reliability <span class="p">(</span>standard deviation<span class="p">)</span>            <span class="m">0.2131</span>
+</pre></div>
+
+
+<p><a name="TwentyNewsgroups-ComplementaryNaiveBayes"></a></p>
+<h2 id="end-to-end-commands-to-build-a-cbayes-model-for-20-newsgroups">End to end commands to build a CBayes model for 20 newsgroups</h2>
+<p>The <a href="https://github.com/apache/mahout/blob/master/examples/bin/classify-20newsgroups.sh">20 newsgroups example script</a> issues the following commands as outlined above. We can build a CBayes classifier from the command line by following the process in the script: </p>
+<p><em>Be sure that <strong>MAHOUT_HOME</strong>/bin and <strong>HADOOP_HOME</strong>/bin are in your <strong>$PATH</strong></em></p>
+<ol>
+<li>
+<p>Create a working directory for the dataset and all input/output.</p>
+<div class="codehilite"><pre>    $ export WORK_DIR=/tmp/mahout-work-<span class="cp">${</span><span class="n">USER</span><span class="cp">}</span>
+    $ mkdir -p <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>
+</pre></div>
+
+
+</li>
+<li>
+<p>Download and extract the <em>20news-bydate.tar.gz</em> from the <a href="http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz">20newsgroups dataset</a> to the working directory.</p>
+<div class="codehilite"><pre>    $ curl http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz 
+        -o <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-bydate.tar.gz
+    $ mkdir -p <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-bydate
+    $ cd <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-bydate <span class="err">&amp;&amp;</span> tar xzf ../20news-bydate.tar.gz <span class="err">&amp;&amp;</span> cd .. <span class="err">&amp;&amp;</span> cd ..
+    $ mkdir <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-all
+    $ cp -R <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-bydate/*/* <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-all
+</pre></div>
+
+
+<ul>
+<li>
+<p>If you're running on a Hadoop cluster:</p>
+<div class="codehilite"><pre>$ hadoop dfs -put <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-all <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-all
+</pre></div>
+
+
+</li>
+</ul>
+</li>
+<li>
+<p>Convert the full 20 newsgroups dataset into a &lt; Text, Text &gt; SequenceFile. </p>
+<div class="codehilite"><pre>    $ mahout seqdirectory 
+        -i <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-all 
+        -o <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-seq 
+        -ow
+</pre></div>
+
+
+</li>
+<li>
+<p>Convert and preprocesses the dataset into  a &lt; Text, VectorWritable &gt; SequenceFile containing term frequencies for each document. </p>
+<div class="codehilite"><pre>    $ mahout seq2sparse 
+        -i <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-seq 
+        -o <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-vectors
+        -lnorm 
+        -nv 
+        -wt tfidf
+</pre></div>
+
+
+<p>If we wanted to use different parsing methods or transformations on the term frequency vectors we could supply different options here e.g.: -ng 2 for bigrams or -n 2 for L2 length normalization.  See the <a href="http://mahout.apache.org/users/basics/creating-vectors-from-text.html">Creating vectors from text</a> page for a list of all seq2sparse options.   </p>
+</li>
+<li>
+<p>Split the preprocessed dataset into training and testing sets.</p>
+<div class="codehilite"><pre>    $ mahout split 
+        -i <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-vectors/tfidf-vectors 
+        --trainingOutput <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-train-vectors 
+        --testOutput <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-test-vectors  
+        --randomSelectionPct 40 
+        --overwrite --sequenceFiles -xm sequential
+</pre></div>
+
+
+</li>
+<li>
+<p>Train the classifier.</p>
+<div class="codehilite"><pre>    $ mahout trainnb 
+        -i <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-train-vectors
+        -el  
+        -o <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/model 
+        -li <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/labelindex 
+        -ow 
+        -c
+</pre></div>
+
+
+</li>
+<li>
+<p>Test the classifier.</p>
+<div class="codehilite"><pre>    $ mahout testnb 
+        -i <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-test-vectors
+        -m <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/model 
+        -l <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/labelindex 
+        -ow 
+        -o <span class="cp">${</span><span class="n">WORK_DIR</span><span class="cp">}</span>/20news-testing 
+        -c
+</pre></div>
+
+
+</li>
+</ol>
+   </div>
+  </div>     
+</div> 
+  <footer class="footer" align="center">
+    <div class="container">
+      <p>
+        Copyright &copy; 2014 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+        Apache and the Apache feather logos are trademarks of The Apache Software Foundation.
+      </p>
+    </div>
+  </footer>
+  
+  <script src="/js/jquery-1.9.1.min.js"></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script>
+    (function() {
+      var cx = '012254517474945470291:vhsfv7eokdc';
+      var gcse = document.createElement('script');
+      gcse.type = 'text/javascript';
+      gcse.async = true;
+      gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
+          '//www.google.com/cse/cse.js?cx=' + cx;
+      var s = document.getElementsByTagName('script')[0];
+      s.parentNode.insertBefore(gcse, s);
+    })();
+  </script>
+</body>
+</html>

Added: websites/staging/mahout/trunk/content/users/mapreduce/clustering/20newsgroups.html
==============================================================================
--- websites/staging/mahout/trunk/content/users/mapreduce/clustering/20newsgroups.html (added)
+++ websites/staging/mahout/trunk/content/users/mapreduce/clustering/20newsgroups.html Thu Mar 19 21:21:45 2015
@@ -0,0 +1,280 @@
+<!DOCTYPE html>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <title>Apache Mahout: Scalable machine learning and data mining</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="Distribution" content="Global">
+  <meta name="Robots" content="index,follow">
+  <meta name="keywords" content="apache, apache hadoop, apache lucene,
+        business data mining, cluster analysis,
+        collaborative filtering, data extraction, data filtering, data framework, data integration,
+        data matching, data mining, data mining algorithms, data mining analysis, data mining data,
+        data mining introduction, data mining software,
+        data mining techniques, data representation, data set, datamining,
+        feature extraction, fuzzy k means, genetic algorithm, hadoop,
+        hierarchical clustering, high dimensional, introduction to data mining, kmeans,
+        knowledge discovery, learning approach, learning approaches, learning methods,
+        learning techniques, lucene, machine learning, machine translation, mahout apache,
+        mahout taste, map reduce hadoop, mining data, mining methods, naive bayes,
+        natural language processing,
+        supervised, text mining, time series data, unsupervised, web data mining">
+  <link rel="shortcut icon" type="image/x-icon" href="http://mahout.apache.org/images/favicon.ico">
+  <script type="text/javascript" src="/js/prototype.js"></script>
+  <script type="text/javascript" src="/js/effects.js"></script>
+  <script type="text/javascript" src="/js/search.js"></script>
+  <script type="text/javascript" src="/js/slides.js"></script>
+
+  <link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
+  <link href="/css/bootstrap-responsive.css" rel="stylesheet">
+  <link rel="stylesheet" href="/css/global.css" type="text/css">
+
+  <!-- mathJax stuff -- use `\(...\)` for inline style math in markdown -->
+  <script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    tex2jax: {
+      skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
+    }
+  });
+  MathJax.Hub.Queue(function() {
+    var all = MathJax.Hub.getAllJax(), i;
+    for(i = 0; i < all.length; i += 1) {
+      all[i].SourceElement().parentNode.className += ' has-jax';
+    }
+  });
+  </script>
+  <script type="text/javascript">
+    var mathjax = document.createElement('script'); 
+    mathjax.type = 'text/javascript'; 
+    mathjax.async = true;
+
+    mathjax.src = ('https:' == document.location.protocol) ?
+        'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' : 
+        'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
+	
+	  var s = document.getElementsByTagName('script')[0]; 
+    s.parentNode.insertBefore(mathjax, s);
+  </script>
+</head>
+
+<body id="home" data-twttr-rendered="true">
+  <div id="wrap">
+   <div id="header">
+    <div id="logo"><a href="/overview.html"></a></div>
+  <div id="search">
+    <form id="search-form" action="http://www.google.com/search" method="get" class="navbar-search pull-right">    
+      <input value="http://mahout.apache.org" name="sitesearch" type="hidden">
+      <input class="search-query" name="q" id="query" type="text">
+      <input id="submission" type="image" src="/images/mahout-lupe.png" alt="Search" />
+    </form>
+  </div>
+
+    <div class="navbar navbar-inverse" style="position:absolute;top:133px;padding-right:0px;padding-left:0px;">
+      <div class="navbar-inner" style="border: none; background: #999; border: none; border-radius: 0px;">
+        <div class="container">
+          <button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <!-- <a class="brand" href="#">Apache Community Development Project</a> -->
+          <div class="nav-collapse collapse">
+            <ul class="nav">
+              <li><a href="/">Home</a></li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">General<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/general/downloads.html">Downloads</a>
+                  <li><a href="/general/who-we-are.html">Who we are</a>
+                  <li><a href="/general/mailing-lists,-irc-and-archives.html">Mailing Lists</a>
+                  <li><a href="/general/release-notes.html">Release Notes</a> 
+                  <li><a href="/general/books-tutorials-and-talks.html">Books, Tutorials, Talks</a></li>
+                  <li><a href="/general/powered-by-mahout.html">Powered By Mahout</a>
+                  <li><a href="/general/professional-support.html">Professional Support</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Resources</li>
+                  <li><a href="/general/reference-reading.html">Reference Reading</a>
+                  <li><a href="/general/faq.html">FAQ</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Legal</li>
+                  <li><a href="http://www.apache.org/licenses/">License</a></li>
+                  <li><a href="http://www.apache.org/security/">Security</a></li>
+                  <li><a href="/general/privacy-policy.html">Privacy Policy</a>
+                </ul>
+              </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Developers<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/developers/developer-resources.html">Developer resources</a></li>
+                  <li><a href="/developers/version-control.html">Version control</a></li>
+                  <li><a href="/developers/buildingmahout.html">Build from source</a></li>
+                  <li><a href="/developers/issue-tracker.html">Issue tracker</a></li>
+                  <li><a href="https://builds.apache.org/job/Mahout-Quality/" target="_blank">Code quality reports</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Contributions</li>
+                  <li><a href="/developers/how-to-contribute.html">How to contribute</a></li>
+                  <li><a href="/developers/how-to-become-a-committer.html">How to become a committer</a></li>
+                  <li><a href="/developers/gsoc.html">GSoC</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">For committers</li>
+                  <li><a href="/developers/how-to-update-the-website.html">How to update the website</a></li>
+                  <li><a href="/developers/patch-check-list.html">Patch check list</a></li>
+                  <li><a href="/developers/github.html">Handling Github PRs</a></li>
+                  <li><a href="/developers/how-to-release.html">How to release</a></li>
+                  <li><a href="/developers/thirdparty-dependencies.html">Third party dependencies</a></li>
+                </ul>
+               </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Basics<b class="caret"></b></a>
+                 <ul class="dropdown-menu">
+                  <li><a href="/users/basics/algorithms.html">List of algorithms</a>
+                  <li><a href="/users/basics/quickstart.html">Quickstart</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Working with text</li>
+                  <li><a href="/users/basics/creating-vectors-from-text.html">Creating vectors from text</a>
+                  <li><a href="/users/basics/collocations.html">Collocations</a>
+                  <li class="divider"></li>
+                  <li class="nav-header">Dimensionality reduction</li>
+                  <li><a href="/users/dim-reduction/dimensional-reduction.html">Singular Value Decomposition</a></li>
+                  <li><a href="/users/dim-reduction/ssvd.html">Stochastic SVD</a></li>
+                  <li class="divider"></li>
+                  <li class="nav-header">Topic Models</li>      
+                  <li><a href="/users/clustering/latent-dirichlet-allocation.html">Latent Dirichlet Allocation</a></li>
+                </ul>
+                 </li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Spark<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/sparkbindings/home.html">Scala &amp; Spark Bindings Overview</a></li>
+                  <li><a href="/users/sparkbindings/play-with-shell.html">Playing with Mahout's Spark Shell</a></li>
+			      <li class="divider"></li>
+                  <li><a href="/users/sparkbindings/faq.html">FAQ</a></li>
+                </ul>
+               </li>
+              <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Classification<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                  <li><a href="/users/mapreduce/classification/bayesian.html">Naive Bayes</a></li>
+                  <li><a href="/users/mapreduce/classification/hidden-markov-models.html">Hidden Markov Models</a></li>
+                  <li><a href="/users/mapreduce/classification/logistic-regression.html">Logistic Regression</a></li>
+                  <li><a href="/users/mapreduce/classification/partial-implementation.html">Random Forest</a></li>
+
+                  <li class="divider"></li>
+                  <li class="nav-header">Examples</li>
+                  <li><a href="/users/mapreduce/classification/breiman-example.html">Breiman example</a></li>
+                  <li><a href="/users/mapreduce/classification/twenty-newsgroups.html">20 newsgroups example</a></li>
+                </ul></li>
+               <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Clustering<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/clustering/k-means-clustering.html">k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-clustering.html">Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means.html">Fuzzy k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/streaming-k-means.html">Streaming KMeans</a></li>
+                <li><a href="/users/mapreduce/clustering/spectral-clustering.html">Spectral Clustering</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Commandline usage</li>
+                <li><a href="/users/mapreduce/clustering/k-means-commandline.html">Options for k-Means</a></li>
+                <li><a href="/users/mapreduce/clustering/canopy-commandline.html">Options for Canopy</a></li>
+                <li><a href="/users/mapreduce/clustering/fuzzy-k-means-commandline.html">Options for Fuzzy k-Means</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Examples</li>
+                <li><a href="/users/mapreduce/clustering/clustering-of-synthetic-control-data.html">Synthetic data</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Post processing</li>
+                <li><a href="/users/mapreduce/clustering/cluster-dumper.html">Cluster Dumper tool</a></li>
+                <li><a href="/users/mapreduce/clustering/visualizing-sample-clusters.html">Cluster visualisation</a></li>
+                </ul></li>
+                <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">Recommendations<b class="caret"></b></a>
+                <ul class="dropdown-menu">
+                <li><a href="/users/mapreduce/recommender/quickstart.html">Quickstart</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-first-timer-faq.html">First Timer FAQ</a></li>
+                <li><a href="/users/mapreduce/recommender/userbased-5-minutes.html">A user-based recommender <br/>in 5 minutes</a></li>
+		<li><a href="/users/mapreduce/recommender/matrix-factorization.html">Matrix factorization-based<br/> recommenders</a></li>
+                <li><a href="/users/mapreduce/recommender/recommender-documentation.html">Overview</a></li>
+                <li class="divider"></li>
+                <li class="nav-header">Hadoop</li>
+                <li><a href="/users/mapreduce/recommender/intro-itembased-hadoop.html">Intro to item-based recommendations<br/> with Hadoop</a></li>
+                <li><a href="/users/mapreduce/recommender/intro-als-hadoop.html">Intro to ALS recommendations<br/> with Hadoop</a></li>
+                <li class="nav-header">Spark</li>
+                <li><a href="/users/mapreduce/recommender/intro-cooccurrence-spark.html">Intro to cooccurrence-based<br/> recommendations with Spark</a></li>
+              </ul>
+            </li>
+           </ul>
+          </div><!--/.nav-collapse -->
+        </div>
+      </div>
+    </div>
+
+</div>
+
+ <div id="sidebar">
+  <div id="sidebar-wrap">
+    <h2>Twitter</h2>
+	<ul class="sidemenu">
+		<li>
+<a class="twitter-timeline" href="https://twitter.com/ApacheMahout" data-widget-id="422861673444028416">Tweets by @ApacheMahout</a>
+<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+"://platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
+</li>
+	</ul>
+    <h2>Apache Software Foundation</h2>
+    <ul class="sidemenu">
+      <li><a href="http://www.apache.org/foundation/how-it-works.html">How the ASF works</a></li>
+      <li><a href="http://www.apache.org/foundation/getinvolved.html">Get Involved</a></li>
+      <li><a href="http://www.apache.org/dev/">Developer Resources</a></li>
+      <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+      <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+    </ul>
+    <h2>Related Projects</h2>
+    <ul class="sidemenu">
+      <li><a href="http://lucene.apache.org/">Lucene</a></li>
+      <li><a href="http://hadoop.apache.org/">Hadoop</a></li>
+    </ul>
+  </div>
+</div>
+
+  <div id="content-wrap" class="clearfix">
+   <div id="main">
+    <p><a name="20Newsgroups-NaiveBayesusing20NewsgroupsData"></a></p>
+<h1 id="naive-bayes-using-20-newsgroups-data">Naive Bayes using 20 Newsgroups Data</h1>
+<p>See <a href="https://issues.apache.org/jira/browse/MAHOUT-9">https://issues.apache.org/jira/browse/MAHOUT-9</a></p>
+   </div>
+  </div>     
+</div> 
+  <footer class="footer" align="center">
+    <div class="container">
+      <p>
+        Copyright &copy; 2014 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+        Apache and the Apache feather logos are trademarks of The Apache Software Foundation.
+      </p>
+    </div>
+  </footer>
+  
+  <script src="/js/jquery-1.9.1.min.js"></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script>
+    (function() {
+      var cx = '012254517474945470291:vhsfv7eokdc';
+      var gcse = document.createElement('script');
+      gcse.type = 'text/javascript';
+      gcse.async = true;
+      gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
+          '//www.google.com/cse/cse.js?cx=' + cx;
+      var s = document.getElementsByTagName('script')[0];
+      s.parentNode.insertBefore(gcse, s);
+    })();
+  </script>
+</body>
+</html>



Mime
View raw message