hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From acmur...@apache.org
Subject svn commit: r611895 [2/4] - in /lucene/hadoop/branches/branch-0.15: ./ docs/ docs/skin/ docs/skin/images/ src/docs/src/documentation/content/xdocs/
Date Mon, 14 Jan 2008 19:16:46 GMT
Modified: lucene/hadoop/branches/branch-0.15/docs/linkmap.pdf
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/docs/linkmap.pdf?rev=611895&r1=611894&r2=611895&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/docs/linkmap.pdf (original)
+++ lucene/hadoop/branches/branch-0.15/docs/linkmap.pdf Mon Jan 14 11:16:43 2008
@@ -5,10 +5,10 @@
 /Producer (FOP 0.20.5) >>
 endobj
 5 0 obj
-<< /Length 357 /Filter [ /ASCII85Decode /FlateDecode ]
+<< /Length 807 /Filter [ /ASCII85Decode /FlateDecode ]
  >>
 stream
-GauI0_,>n>&;KY!METC?CGrC"Nj`F.NYeV(W<]6G1/^A&*T(0o)+Zp_BQ?ZCAG=hhs0WHJ>9?gp&kjq=\>2I.5nM+G'#`g5(eF;:#g<UH]j9#u/%AG;)K*4.U3]$MbmIdn4:EnQ1r1dMX;T+jcNKHEPnq3rPum.l5-:kLX86_r0p''/O]3Qe#m5V<OW1G%7,)fBqF^G*>hr$Mh_DcrLsR56#bIK6F`53R[DXtdUdiT?fEY&K$uH3ZGS9>i8i:fc@\e+]Ng%D?EQA^gaAXTIFn=&tLL\*oq4a5lV52V!1.TC[]-^*T]rUc(4)(7LD>dW)PJ3tGFOJ<P&[llGkcDt]YAB9Cf7*hB0p>c~>
+GatUr9i'Ou&;KZL'gBJ8f:Vju0$k?8W*7hE4d;se'/+-`NL.Qap"q]g!BZqc.I9[=*a>',)/$s\dldK6VLU@V5e`5>r@9&pDBfQds2?+eF"p%D`[T"'i7H@G)0lWc8<WZs8!g6igP^]VaMm$d=P$bA51?p%[:t]<DifJ9PrZXu.?EP7P/%nh-^X:$CL(nkEYOOf+$L8r_b%GEfh$)rH@Mlh$"d?U>,!VO,nihWVWeH:2%mX-isBn#_(:"D^EJ9F'.cUr>Bl)LH]IedPZlgD3#[JM]`#K&N8HT?$&2SP`\(PUdh!e5&.CpX7bB(*]AV"XdI$i*-B-CG;DcOp6OST??+cL4349KjoY2&Qh]HD#T]G-B.S_/+nQ8/n31V9]"JuiC=.Li/=sFrlS;k-$$WZTs[ki+WV7qgnd@F9CCLlH=Hm;@p3:1COG'<ULMQQJ0#8o=iWNREtcIX"2\8$n,T?0G8/dBQU(pe=c\gt8]8]Va@XH.;ZE,mPRXpC&N]JI,rU0:13O<nIJh>WXSA3%tN.scei@i?fiWkSF&<`2Q0,-CT#a+iTqP$.bYi]$@S3#\)<P+cnc@cF%ml"HMZQHW-IV\TA3o\03&i,4Q&:+*)P]t4%_:?['UmcS:dp.c'qb%9,Pqn#h0=oJJd^>J.\J#$1,O2F>WoB:X:;hd@MUBDn`YX'-m_EfE_2S+c@D0nYHp#l#q,miTtGD#9#)fq*<a:fqBV@W\FFh9HDDn.#2H]'#2"RXJeBQdD];2"X4Dp>%jJsj%7l>jC*@IRB!H"53="'`54FX0WD6iR[Sm7:F~>
 endstream
 endobj
 6 0 obj
@@ -17,143 +17,78 @@
 /MediaBox [ 0 0 612 792 ]
 /Resources 3 0 R
 /Contents 5 0 R
-/Annots 7 0 R
 >>
 endobj
 7 0 obj
-[
-8 0 R
-]
-endobj
-8 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 102.0 559.666 195.992 547.666 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 9 0 R
-/H /I
->>
-endobj
-10 0 obj
-<< /Length 822 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-Gatn%?#Q2d'Sc)P'u"K;iqhDKR3p+gC"&LSBc#M\\R6%L&lLEcp?uc>$DoCd&kd+_/H!4XbW.lM3qTRU&mtnU#SR`"XbQq_?OR!Hf*%i[;ZX`upj%GL-(#9NY?utJ0%4*n-f+5[IPCV+b"M4CKV,pgU%_q`CZ1=\.nU]OK`J3"IRY`;$ksW8H#)l[#/UA((8qa23(1Djq*bQR)-j<=X625.:E,[;2r(>kc]=Ai[Zl8r5%dmc8sNigK7NT9bLGbM5_u@)OFTa1j>2a!Nb'83H[B#?V"nFCSkiNPa">T3ni]*405t8boJYR87pS6SqIq-V5Tn%?B9]$Ra6820rnDP\.ZNjhUC&fBO#8<J>!4k!@t`']ibcP$<g?,j?$0tjW@"JcZY>4,le.T+lkh,WcADE"oObYfFHlA@g=aE)9NC;5Ygbup^Lt6<PCH+qq_&1hk,UPg^?`:dS/WlX9O=MERn-a*.:YO4]fHoi*p$r6*\*nD.C@g,L[F^bV3LVBb"d8A:ZA''RbrSZ%W^7X(t1Ha;baEi3i:qjS.(\1O7YW,#UP'=(/KqP(_``5'sj@4rsP"MOJmqS,291(lQXU-/3CPg055,I^.a7_88Uf*[TP,-g-^C/qmX2V^44-OOOt$R,\ZpgRQ`_V;Sk[+;R7D3D/$OZ69TcSSK9!^,sMhE[M!jK*[KQdnA:8k,O/38lbf&:^K\,5q[j(C^-J05#PphDmS#d2Eu5/mHtLcG/%*HZIuAE\1Q>^(=P/8h\;LVZZAZSjmXk737;fc['D&",B4[?0LUcUJd+O^r?-mh_:97+C3ctkCZ[2mZW`)X~>
-endstream
-endobj
-11 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 10 0 R
->>
-endobj
-13 0 obj
-<<
- /Title (\376\377\0\61\0\40\0\124\0\141\0\142\0\154\0\145\0\40\0\157\0\146\0\40\0\103\0\157\0\156\0\164\0\145\0\156\0\164\0\163)
- /Parent 12 0 R
- /A 9 0 R
->> endobj
-14 0 obj
 << /Type /Font
 /Subtype /Type1
 /Name /F3
 /BaseFont /Helvetica-Bold
 /Encoding /WinAnsiEncoding >>
 endobj
-15 0 obj
+8 0 obj
 << /Type /Font
 /Subtype /Type1
 /Name /F5
 /BaseFont /Times-Roman
 /Encoding /WinAnsiEncoding >>
 endobj
-16 0 obj
+9 0 obj
 << /Type /Font
 /Subtype /Type1
 /Name /F6
 /BaseFont /Times-Italic
 /Encoding /WinAnsiEncoding >>
 endobj
-17 0 obj
+10 0 obj
 << /Type /Font
 /Subtype /Type1
 /Name /F1
 /BaseFont /Helvetica
 /Encoding /WinAnsiEncoding >>
 endobj
-18 0 obj
+11 0 obj
 << /Type /Font
 /Subtype /Type1
 /Name /F2
 /BaseFont /Helvetica-Oblique
 /Encoding /WinAnsiEncoding >>
 endobj
-19 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F7
-/BaseFont /Times-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
 1 0 obj
 << /Type /Pages
-/Count 2
-/Kids [6 0 R 11 0 R ] >>
+/Count 1
+/Kids [6 0 R ] >>
 endobj
 2 0 obj
 << /Type /Catalog
 /Pages 1 0 R
- /Outlines 12 0 R
- /PageMode /UseOutlines
  >>
 endobj
 3 0 obj
 << 
-/Font << /F3 14 0 R /F5 15 0 R /F1 17 0 R /F6 16 0 R /F2 18 0 R /F7 19 0 R >> 
+/Font << /F3 7 0 R /F5 8 0 R /F1 10 0 R /F6 9 0 R /F2 11 0 R >> 
 /ProcSet [ /PDF /ImageC /Text ] >> 
 endobj
-9 0 obj
-<<
-/S /GoTo
-/D [11 0 R /XYZ 85.0 659.0 null]
->>
-endobj
-12 0 obj
-<<
- /First 13 0 R
- /Last 13 0 R
->> endobj
 xref
-0 20
+0 12
 0000000000 65535 f 
-0000002665 00000 n 
-0000002730 00000 n 
-0000002822 00000 n 
+0000001630 00000 n 
+0000001688 00000 n 
+0000001738 00000 n 
 0000000015 00000 n 
 0000000071 00000 n 
-0000000519 00000 n 
-0000000639 00000 n 
-0000000664 00000 n 
-0000002956 00000 n 
-0000000799 00000 n 
-0000001713 00000 n 
-0000003019 00000 n 
-0000001821 00000 n 
-0000001998 00000 n 
-0000002111 00000 n 
-0000002221 00000 n 
-0000002332 00000 n 
-0000002440 00000 n 
-0000002556 00000 n 
+0000000969 00000 n 
+0000001075 00000 n 
+0000001187 00000 n 
+0000001296 00000 n 
+0000001406 00000 n 
+0000001514 00000 n 
 trailer
 <<
-/Size 20
+/Size 12
 /Root 2 0 R
 /Info 4 0 R
 >>
 startxref
-3070
+1858
 %%EOF

Modified: lucene/hadoop/branches/branch-0.15/docs/mapred_tutorial.html
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/docs/mapred_tutorial.html?rev=611895&r1=611894&r2=611895&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/docs/mapred_tutorial.html (original)
+++ lucene/hadoop/branches/branch-0.15/docs/mapred_tutorial.html Mon Jan 14 11:16:43 2008
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.7">
+<meta name="Forrest-version" content="0.8">
 <meta name="Forrest-skin-name" content="pelt">
 <title>Hadoop Map-Reduce Tutorial</title>
 <link type="text/css" href="skin/basic.css" rel="stylesheet">
@@ -16,46 +16,91 @@
 <body onload="init()">
 <script type="text/javascript">ndeSetTextSize();</script>
 <div id="top">
+<!--+
+    |breadtrail
+    +-->
 <div class="breadtrail">
 <a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/hadoop/">Hadoop</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
+<!--+
+    |header
+    +-->
 <div class="header">
+<!--+
+    |start group logo
+    +-->
 <div class="grouplogo">
 <a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
 <div class="projectlogo">
 <a href="http://lucene.apache.org/hadoop/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
 </div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
 <div class="searchbox">
 <form action="http://www.google.com/search" method="get" class="roundtopsmall">
 <input value="lucene.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
-                    <input attr="value" name="Search" value="Search" type="submit">
+                    <input name="Search" value="Search" type="submit">
 </form>
 </div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
 <ul id="tabs">
 <li>
-<a class="base-not-selected" href="http://lucene.apache.org/hadoop/">Project</a>
+<a class="unselected" href="http://lucene.apache.org/hadoop/">Project</a>
 </li>
 <li>
-<a class="base-not-selected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
+<a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 </li>
 <li class="current">
-<a class="base-selected" href="index.html">Hadoop 0.15 Documentation</a>
+<a class="selected" href="index.html">Hadoop 0.15 Documentation</a>
 </li>
 </ul>
+<!--+
+    |end Tabs
+    +-->
 </div>
 </div>
 <div id="main">
 <div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
 <div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
 <script type="text/javascript"><!--
-document.write("<text>Last Published:</text> " + document.lastModified);
+document.write("Last Published: " + document.lastModified);
 //  --></script>
 </div>
+<!--+
+    |breadtrail
+    +-->
 <div class="breadtrail">
-             
+
              &nbsp;
            </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
 <div id="menu">
 <div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
 <div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
@@ -93,8 +138,17 @@
 <div id="credit"></div>
 <div id="roundbottom">
 <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
 <div id="credit2"></div>
 </div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
 <div id="content">
 <div title="Portable Document Format" class="pdflink">
 <a class="dida" href="mapred_tutorial.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
@@ -211,13 +265,13 @@
 <a href="#Example%3A+WordCount+v2.0">Example: WordCount v2.0</a>
 <ul class="minitoc">
 <li>
-<a href="#Source+Code-N10A91">Source Code</a>
+<a href="#Source+Code-N10AB2">Source Code</a>
 </li>
 <li>
 <a href="#Sample+Runs">Sample Runs</a>
 </li>
 <li>
-<a href="#Salient+Points">Salient Points</a>
+<a href="#Highlights">Highlights</a>
 </li>
 </ul>
 </li>
@@ -354,7 +408,12 @@
 <p>
 <span class="codefrag">WordCount</span> is a simple application that counts the number of
       occurences of each word in a given input set.</p>
-<a name="N100DA"></a><a name="Source+Code"></a>
+<p>This works with a 
+      <a href="quickstart.html#Standalone+Operation">local-standalone</a>,
+      <a href="quickstart.html#SingleNodeSetup">pseudo-distributed</a> or
+      <a href="quickstart.html#Fully-Distributed+Operation">fully-distributed</a> 
+      Hadoop installation.</p>
+<a name="N100E9"></a><a name="Source+Code"></a>
 <h3 class="h4">Source Code</h3>
 <table class="ForrestTable" cellspacing="1" cellpadding="4">
           
@@ -385,7 +444,7 @@
             
 <td colspan="1" rowspan="1">3.</td>
             <td colspan="1" rowspan="1">
-              <span class="codefrag">import java.io.Exception;</span>
+              <span class="codefrag">import java.io.IOException;</span>
             </td>
           
 </tr>
@@ -480,7 +539,7 @@
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;
               <span class="codefrag">
-                public static class MapClass extends MapReduceBase 
+                public static class Map extends MapReduceBase 
                 implements Mapper&lt;LongWritable, Text, Text, IntWritable&gt; {
               </span>
             </td>
@@ -794,7 +853,7 @@
 <td colspan="1" rowspan="1">45.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
-              <span class="codefrag">conf.setMapperClass(MapClass.class);</span>
+              <span class="codefrag">conf.setMapperClass(Map.class);</span>
             </td>
           
 </tr>
@@ -858,7 +917,7 @@
 <td colspan="1" rowspan="1">52.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
-              <span class="codefrag">conf.setInputPath(new Path(args[1]));</span>
+              <span class="codefrag">conf.setInputPath(new Path(args[0]));</span>
             </td>
           
 </tr>
@@ -868,7 +927,7 @@
 <td colspan="1" rowspan="1">53.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
-              <span class="codefrag">conf.setOutputPath(new Path(args[2]));</span>
+              <span class="codefrag">conf.setOutputPath(new Path(args[1]));</span>
             </td>
           
 </tr>
@@ -917,20 +976,23 @@
 </tr>
         
 </table>
-<a name="N1045C"></a><a name="Usage"></a>
+<a name="N1046B"></a><a name="Usage"></a>
 <h3 class="h4">Usage</h3>
 <p>Assuming <span class="codefrag">HADOOP_HOME</span> is the root of the installation and 
         <span class="codefrag">HADOOP_VERSION</span> is the Hadoop version installed, compile 
         <span class="codefrag">WordCount.java</span> and create a jar:</p>
 <p>
           
+<span class="codefrag">$ mkdir wordcount_classes</span>
+<br>
+          
 <span class="codefrag">
             $ javac -classpath ${HADOOP_HOME}/hadoop-${HADOOP_VERSION}-core.jar 
-              WordCount.java
+              -d wordcount_classes WordCount.java
           </span>
 <br>
           
-<span class="codefrag">$ jar -cvf /usr/joe/wordcount.jar WordCount.class</span> 
+<span class="codefrag">$ jar -cvf /usr/joe/wordcount.jar -C wordcount_classes/ .</span> 
         
 </p>
 <p>Assuming that:</p>
@@ -1009,7 +1071,7 @@
 <br>
         
 </p>
-<a name="N104D8"></a><a name="Walk-through"></a>
+<a name="N104EB"></a><a name="Walk-through"></a>
 <h3 class="h4">Walk-through</h3>
 <p>The <span class="codefrag">WordCount</span> application is quite straight-forward.</p>
 <p>The <span class="codefrag">Mapper</span> implementation (lines 14-26), via the 
@@ -1119,7 +1181,7 @@
 </div>
     
     
-<a name="N1058F"></a><a name="Map-Reduce+-+User+Interfaces"></a>
+<a name="N105A2"></a><a name="Map-Reduce+-+User+Interfaces"></a>
 <h2 class="h3">Map-Reduce - User Interfaces</h2>
 <div class="section">
 <p>This section provides a reasonable amount of detail on every user-facing 
@@ -1138,12 +1200,12 @@
 <p>Finally, we will wrap up by discussing some useful features of the
       framework such as the <span class="codefrag">DistributedCache</span>, 
       <span class="codefrag">IsolationRunner</span> etc.</p>
-<a name="N105C8"></a><a name="Payload"></a>
+<a name="N105DB"></a><a name="Payload"></a>
 <h3 class="h4">Payload</h3>
 <p>Applications typically implement the <span class="codefrag">Mapper</span> and 
         <span class="codefrag">Reducer</span> interfaces to provide the <span class="codefrag">map</span> and 
         <span class="codefrag">reduce</span> methods. These form the core of the job.</p>
-<a name="N105DD"></a><a name="Mapper"></a>
+<a name="N105F0"></a><a name="Mapper"></a>
 <h4>Mapper</h4>
 <p>
 <a href="api/org/apache/hadoop/mapred/Mapper.html">
@@ -1199,7 +1261,7 @@
           <a href="api/org/apache/hadoop/io/compress/CompressionCodec.html">
           CompressionCodec</a> to be used via the <span class="codefrag">JobConf</span>.
           </p>
-<a name="N10657"></a><a name="How+Many+Maps%3F"></a>
+<a name="N1066A"></a><a name="How+Many+Maps%3F"></a>
 <h5>How Many Maps?</h5>
 <p>The number of maps is usually driven by the total size of the 
             inputs, that is, the total number of blocks of the input files.</p>
@@ -1212,7 +1274,7 @@
             <a href="api/org/apache/hadoop/mapred/JobConf.html#setNumMapTasks(int)">
             setNumMapTasks(int)</a> (which only provides a hint to the framework) 
             is used to set it even higher.</p>
-<a name="N1066F"></a><a name="Reducer"></a>
+<a name="N10682"></a><a name="Reducer"></a>
 <h4>Reducer</h4>
 <p>
 <a href="api/org/apache/hadoop/mapred/Reducer.html">
@@ -1235,18 +1297,18 @@
 <p>
 <span class="codefrag">Reducer</span> has 3 primary phases: shuffle, sort and reduce.
           </p>
-<a name="N1069F"></a><a name="Shuffle"></a>
+<a name="N106B2"></a><a name="Shuffle"></a>
 <h5>Shuffle</h5>
 <p>Input to the <span class="codefrag">Reducer</span> is the sorted output of the
             mappers. In this phase the framework fetches the relevant partition 
             of the output of all the mappers, via HTTP.</p>
-<a name="N106AC"></a><a name="Sort"></a>
+<a name="N106BF"></a><a name="Sort"></a>
 <h5>Sort</h5>
 <p>The framework groups <span class="codefrag">Reducer</span> inputs by keys (since 
             different mappers may have output the same key) in this stage.</p>
 <p>The shuffle and sort phases occur simultaneously; while 
             map-outputs are being fetched they are merged.</p>
-<a name="N106BB"></a><a name="Secondary+Sort"></a>
+<a name="N106CE"></a><a name="Secondary+Sort"></a>
 <h5>Secondary Sort</h5>
 <p>If equivalence rules for grouping the intermediate keys are 
               required to be different from those for grouping keys before 
@@ -1257,7 +1319,7 @@
               JobConf.setOutputKeyComparatorClass(Class)</a> can be used to 
               control how intermediate keys are grouped, these can be used in 
               conjunction to simulate <em>secondary sort on values</em>.</p>
-<a name="N106D4"></a><a name="Reduce"></a>
+<a name="N106E7"></a><a name="Reduce"></a>
 <h5>Reduce</h5>
 <p>In this phase the 
             <a href="api/org/apache/hadoop/mapred/Reducer.html#reduce(K2, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)">
@@ -1273,7 +1335,7 @@
             progress, set application-level status messages and update 
             <span class="codefrag">Counters</span>, or just indicate that they are alive.</p>
 <p>The output of the <span class="codefrag">Reducer</span> is <em>not sorted</em>.</p>
-<a name="N10702"></a><a name="How+Many+Reduces%3F"></a>
+<a name="N10715"></a><a name="How+Many+Reduces%3F"></a>
 <h5>How Many Reduces?</h5>
 <p>The right number of reduces seems to be <span class="codefrag">0.95</span> or 
             <span class="codefrag">1.75</span> multiplied by (&lt;<em>no. of nodes</em>&gt; * 
@@ -1288,7 +1350,7 @@
 <p>The scaling factors above are slightly less than whole numbers to 
             reserve a few reduce slots in the framework for speculative-tasks and
             failed tasks.</p>
-<a name="N10727"></a><a name="Reducer+NONE"></a>
+<a name="N1073A"></a><a name="Reducer+NONE"></a>
 <h5>Reducer NONE</h5>
 <p>It is legal to set the number of reduce-tasks to <em>zero</em> if 
             no reduction is desired.</p>
@@ -1298,7 +1360,7 @@
             setOutputPath(Path)</a>. The framework does not sort the 
             map-outputs before writing them out to the <span class="codefrag">FileSystem</span>.
             </p>
-<a name="N10742"></a><a name="Partitioner"></a>
+<a name="N10755"></a><a name="Partitioner"></a>
 <h4>Partitioner</h4>
 <p>
 <a href="api/org/apache/hadoop/mapred/Partitioner.html">
@@ -1312,7 +1374,7 @@
 <p>
 <a href="api/org/apache/hadoop/mapred/lib/HashPartitioner.html">
           HashPartitioner</a> is the default <span class="codefrag">Partitioner</span>.</p>
-<a name="N10761"></a><a name="Reporter"></a>
+<a name="N10774"></a><a name="Reporter"></a>
 <h4>Reporter</h4>
 <p>
 <a href="api/org/apache/hadoop/mapred/Reporter.html">
@@ -1331,7 +1393,7 @@
           </p>
 <p>Applications can also update <span class="codefrag">Counters</span> using the 
           <span class="codefrag">Reporter</span>.</p>
-<a name="N1078B"></a><a name="OutputCollector"></a>
+<a name="N1079E"></a><a name="OutputCollector"></a>
 <h4>OutputCollector</h4>
 <p>
 <a href="api/org/apache/hadoop/mapred/OutputCollector.html">
@@ -1342,7 +1404,7 @@
 <p>Hadoop Map-Reduce comes bundled with a 
         <a href="api/org/apache/hadoop/mapred/lib/package-summary.html">
         library</a> of generally useful mappers, reducers, and partitioners.</p>
-<a name="N107A6"></a><a name="Job+Configuration"></a>
+<a name="N107B9"></a><a name="Job+Configuration"></a>
 <h3 class="h4">Job Configuration</h3>
 <p>
 <a href="api/org/apache/hadoop/mapred/JobConf.html">
@@ -1397,7 +1459,7 @@
         <a href="api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String, java.lang.String)">set(String, String)</a>/<a href="api/org/apache/hadoop/conf/Configuration.html#get(java.lang.String, java.lang.String)">get(String, String)</a>
         to set/get arbitrary parameters needed by applications. However, use the 
         <span class="codefrag">DistributedCache</span> for large amounts of (read-only) data.</p>
-<a name="N1082C"></a><a name="Job+Submission+and+Monitoring"></a>
+<a name="N1083F"></a><a name="Job+Submission+and+Monitoring"></a>
 <h3 class="h4">Job Submission and Monitoring</h3>
 <p>
 <a href="api/org/apache/hadoop/mapred/JobClient.html">
@@ -1433,7 +1495,7 @@
 <p>Normally the user creates the application, describes various facets 
         of the job via <span class="codefrag">JobConf</span>, and then uses the 
         <span class="codefrag">JobClient</span> to submit the job and monitor its progress.</p>
-<a name="N1086A"></a><a name="Job+Control"></a>
+<a name="N1087D"></a><a name="Job+Control"></a>
 <h4>Job Control</h4>
 <p>Users may need to chain map-reduce jobs to accomplish complex
           tasks which cannot be done via a single map-reduce job. This is fairly
@@ -1469,7 +1531,7 @@
             </li>
           
 </ul>
-<a name="N10894"></a><a name="Job+Input"></a>
+<a name="N108A7"></a><a name="Job+Input"></a>
 <h3 class="h4">Job Input</h3>
 <p>
 <a href="api/org/apache/hadoop/mapred/InputFormat.html">
@@ -1512,7 +1574,7 @@
 <a href="api/org/apache/hadoop/mapred/TextInputFormat.html">
         TextInputFormat</a> is the default <span class="codefrag">InputFormat</span>.
         </p>
-<a name="N108E9"></a><a name="InputSplit"></a>
+<a name="N108FC"></a><a name="InputSplit"></a>
 <h4>InputSplit</h4>
 <p>
 <a href="api/org/apache/hadoop/mapred/InputSplit.html">
@@ -1526,7 +1588,7 @@
           FileSplit</a> is the default <span class="codefrag">InputSplit</span>. It sets 
           <span class="codefrag">map.input.file</span> to the path of the input file for the
           logical split.</p>
-<a name="N1090E"></a><a name="RecordReader"></a>
+<a name="N10921"></a><a name="RecordReader"></a>
 <h4>RecordReader</h4>
 <p>
 <a href="api/org/apache/hadoop/mapred/RecordReader.html">
@@ -1538,7 +1600,7 @@
           for processing. <span class="codefrag">RecordReader</span> thus assumes the 
           responsibility of processing record boundaries and presents the tasks 
           with keys and values.</p>
-<a name="N10931"></a><a name="Job+Output"></a>
+<a name="N10944"></a><a name="Job+Output"></a>
 <h3 class="h4">Job Output</h3>
 <p>
 <a href="api/org/apache/hadoop/mapred/OutputFormat.html">
@@ -1563,7 +1625,7 @@
 <p>
 <span class="codefrag">TextOutputFormat</span> is the default 
         <span class="codefrag">OutputFormat</span>.</p>
-<a name="N1095A"></a><a name="Task+Side-Effect+Files"></a>
+<a name="N1096D"></a><a name="Task+Side-Effect+Files"></a>
 <h4>Task Side-Effect Files</h4>
 <p>In some applications, component tasks need to create and/or write to
           side-files, which differ from the actual job-output files.</p>
@@ -1589,7 +1651,7 @@
           JobConf.getOutputPath()</a>, and the framework will promote them 
           similarly for succesful task-attempts, thus eliminating the need to 
           pick unique paths per task-attempt.</p>
-<a name="N1098F"></a><a name="RecordWriter"></a>
+<a name="N109A2"></a><a name="RecordWriter"></a>
 <h4>RecordWriter</h4>
 <p>
 <a href="api/org/apache/hadoop/mapred/RecordWriter.html">
@@ -1597,9 +1659,9 @@
           pairs to an output file.</p>
 <p>RecordWriter implementations write the job outputs to the 
           <span class="codefrag">FileSystem</span>.</p>
-<a name="N109A6"></a><a name="Other+Useful+Features"></a>
+<a name="N109B9"></a><a name="Other+Useful+Features"></a>
 <h3 class="h4">Other Useful Features</h3>
-<a name="N109AC"></a><a name="Counters"></a>
+<a name="N109BF"></a><a name="Counters"></a>
 <h4>Counters</h4>
 <p>
 <span class="codefrag">Counters</span> represent global counters, defined either by 
@@ -1613,7 +1675,7 @@
           Reporter.incrCounter(Enum, long)</a> in the <span class="codefrag">map</span> and/or 
           <span class="codefrag">reduce</span> methods. These counters are then globally 
           aggregated by the framework.</p>
-<a name="N109D7"></a><a name="DistributedCache"></a>
+<a name="N109EA"></a><a name="DistributedCache"></a>
 <h4>DistributedCache</h4>
 <p>
 <a href="api/org/apache/hadoop/filecache/DistributedCache.html">
@@ -1645,7 +1707,7 @@
 <span class="codefrag">DistributedCache</span> tracks the modification timestamps of 
           the cached files. Clearly the cache files should not be modified by 
           the application or externally while the job is executing.</p>
-<a name="N10A11"></a><a name="Tool"></a>
+<a name="N10A24"></a><a name="Tool"></a>
 <h4>Tool</h4>
 <p>The <a href="api/org/apache/hadoop/util/Tool.html">Tool</a> 
           interface supports the handling of generic Hadoop command-line options.
@@ -1685,7 +1747,7 @@
             </span>
           
 </p>
-<a name="N10A43"></a><a name="IsolationRunner"></a>
+<a name="N10A56"></a><a name="IsolationRunner"></a>
 <h4>IsolationRunner</h4>
 <p>
 <a href="api/org/apache/hadoop/mapred/IsolationRunner.html">
@@ -1709,7 +1771,7 @@
 <p>
 <span class="codefrag">IsolationRunner</span> will run the failed task in a single 
           jvm, which can be in the debugger, over precisely the same input.</p>
-<a name="N10A76"></a><a name="JobControl"></a>
+<a name="N10A89"></a><a name="JobControl"></a>
 <h4>JobControl</h4>
 <p>
 <a href="api/org/apache/hadoop/mapred/jobcontrol/package-summary.html">
@@ -1718,12 +1780,17 @@
 </div>
 
     
-<a name="N10A85"></a><a name="Example%3A+WordCount+v2.0"></a>
+<a name="N10A98"></a><a name="Example%3A+WordCount+v2.0"></a>
 <h2 class="h3">Example: WordCount v2.0</h2>
 <div class="section">
 <p>Here is a more complete <span class="codefrag">WordCount</span> which uses many of the
-      features provided by the Map-Reduce framework we discussed so far:</p>
-<a name="N10A91"></a><a name="Source+Code-N10A91"></a>
+      features provided by the Map-Reduce framework we discussed so far.</p>
+<p>This needs the HDFS to be up and running, especially for the 
+      <span class="codefrag">DistributedCache</span>-related features. Hence it only works with a 
+      <a href="quickstart.html#SingleNodeSetup">pseudo-distributed</a> or
+      <a href="quickstart.html#Fully-Distributed+Operation">fully-distributed</a> 
+      Hadoop installation.</p>
+<a name="N10AB2"></a><a name="Source+Code-N10AB2"></a>
 <h3 class="h4">Source Code</h3>
 <table class="ForrestTable" cellspacing="1" cellpadding="4">
           
@@ -1858,7 +1925,7 @@
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;
               <span class="codefrag">
-                public static class MapClass extends MapReduceBase 
+                public static class Map extends MapReduceBase 
                 implements Mapper&lt;LongWritable, Text, Text, IntWritable&gt; {
               </span>
             </td>
@@ -2018,7 +2085,7 @@
 <td colspan="1" rowspan="1">32.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-              <span class="codefrag">Path[] patternsFiles = new Path[0];</span>
+              <span class="codefrag">if (job.getBoolean("wordcount.skip.patterns", false)) {</span>
             </td>
           
 </tr>
@@ -2027,8 +2094,8 @@
             
 <td colspan="1" rowspan="1">33.</td>
             <td colspan="1" rowspan="1">
-              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-              <span class="codefrag">try {</span>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">Path[] patternsFiles = new Path[0];</span>
             </td>
           
 </tr>
@@ -2038,6 +2105,16 @@
 <td colspan="1" rowspan="1">34.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">try {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">35.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
                 patternsFiles = DistributedCache.getLocalCacheFiles(job);
               </span>
@@ -2047,9 +2124,9 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">35.</td>
+<td colspan="1" rowspan="1">36.</td>
             <td colspan="1" rowspan="1">
-              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">} catch (IOException ioe) {</span>
             </td>
           
@@ -2057,9 +2134,9 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">36.</td>
+<td colspan="1" rowspan="1">37.</td>
             <td colspan="1" rowspan="1">
-              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
                 System.err.println("Caught exception while getting cached files: " 
                 + StringUtils.stringifyException(ioe));
@@ -2070,9 +2147,9 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">37.</td>
+<td colspan="1" rowspan="1">38.</td>
             <td colspan="1" rowspan="1">
-              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
             </td>
           
@@ -2080,9 +2157,9 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">38.</td>
+<td colspan="1" rowspan="1">39.</td>
             <td colspan="1" rowspan="1">
-              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">for (Path patternsFile : patternsFiles) {</span>
             </td>
           
@@ -2090,9 +2167,9 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">39.</td>
+<td colspan="1" rowspan="1">40.</td>
             <td colspan="1" rowspan="1">
-              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">parseSkipFile(patternsFile);</span>
             </td>
           
@@ -2100,7 +2177,17 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">40.</td>
+<td colspan="1" rowspan="1">41.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">42.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2110,7 +2197,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">41.</td>
+<td colspan="1" rowspan="1">43.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2120,14 +2207,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">42.</td>
+<td colspan="1" rowspan="1">44.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">43.</td>
+<td colspan="1" rowspan="1">45.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">private void parseSkipFile(Path patternsFile) {</span>
@@ -2137,7 +2224,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">44.</td>
+<td colspan="1" rowspan="1">46.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">try {</span>
@@ -2147,7 +2234,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">45.</td>
+<td colspan="1" rowspan="1">47.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
@@ -2160,7 +2247,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">46.</td>
+<td colspan="1" rowspan="1">48.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">String pattern = null;</span>
@@ -2170,7 +2257,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">47.</td>
+<td colspan="1" rowspan="1">49.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">while ((pattern = fis.readLine()) != null) {</span>
@@ -2180,7 +2267,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">48.</td>
+<td colspan="1" rowspan="1">50.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">patternsToSkip.add(pattern);</span>
@@ -2190,7 +2277,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">49.</td>
+<td colspan="1" rowspan="1">51.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2200,7 +2287,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">50.</td>
+<td colspan="1" rowspan="1">52.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">} catch (IOException ioe) {</span>
@@ -2210,7 +2297,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">51.</td>
+<td colspan="1" rowspan="1">53.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
@@ -2225,7 +2312,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">52.</td>
+<td colspan="1" rowspan="1">54.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2235,7 +2322,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">53.</td>
+<td colspan="1" rowspan="1">55.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2245,14 +2332,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">54.</td>
+<td colspan="1" rowspan="1">56.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">55.</td>
+<td colspan="1" rowspan="1">57.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
@@ -2266,7 +2353,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">56.</td>
+<td colspan="1" rowspan="1">58.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
@@ -2280,14 +2367,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">57.</td>
+<td colspan="1" rowspan="1">59.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">58.</td>
+<td colspan="1" rowspan="1">60.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">for (String pattern : patternsToSkip) {</span>
@@ -2297,7 +2384,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">59.</td>
+<td colspan="1" rowspan="1">61.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">line = line.replaceAll(pattern, "");</span>
@@ -2307,7 +2394,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">60.</td>
+<td colspan="1" rowspan="1">62.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2317,14 +2404,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">61.</td>
+<td colspan="1" rowspan="1">63.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">62.</td>
+<td colspan="1" rowspan="1">64.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">StringTokenizer tokenizer = new StringTokenizer(line);</span>
@@ -2334,7 +2421,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">63.</td>
+<td colspan="1" rowspan="1">65.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">while (tokenizer.hasMoreTokens()) {</span>
@@ -2344,7 +2431,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">64.</td>
+<td colspan="1" rowspan="1">66.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">word.set(tokenizer.nextToken());</span>
@@ -2354,7 +2441,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">65.</td>
+<td colspan="1" rowspan="1">67.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">output.collect(word, one);</span>
@@ -2364,7 +2451,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">66.</td>
+<td colspan="1" rowspan="1">68.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">reporter.incrCounter(Counters.INPUT_WORDS, 1);</span>
@@ -2374,7 +2461,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">67.</td>
+<td colspan="1" rowspan="1">69.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2384,14 +2471,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">68.</td>
+<td colspan="1" rowspan="1">70.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">69.</td>
+<td colspan="1" rowspan="1">71.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">if ((++numRecords % 100) == 0) {</span>
@@ -2401,7 +2488,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">70.</td>
+<td colspan="1" rowspan="1">72.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
@@ -2415,7 +2502,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">71.</td>
+<td colspan="1" rowspan="1">73.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2425,7 +2512,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">72.</td>
+<td colspan="1" rowspan="1">74.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2435,7 +2522,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">73.</td>
+<td colspan="1" rowspan="1">75.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2445,14 +2532,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">74.</td>
+<td colspan="1" rowspan="1">76.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">75.</td>
+<td colspan="1" rowspan="1">77.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;
               <span class="codefrag">
@@ -2465,7 +2552,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">76.</td>
+<td colspan="1" rowspan="1">78.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
@@ -2479,7 +2566,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">77.</td>
+<td colspan="1" rowspan="1">79.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">int sum = 0;</span>
@@ -2489,7 +2576,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">78.</td>
+<td colspan="1" rowspan="1">80.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">while (values.hasNext()) {</span>
@@ -2499,7 +2586,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">79.</td>
+<td colspan="1" rowspan="1">81.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">sum += values.next().get();</span>
@@ -2509,7 +2596,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">80.</td>
+<td colspan="1" rowspan="1">82.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2519,7 +2606,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">81.</td>
+<td colspan="1" rowspan="1">83.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">output.collect(key, new IntWritable(sum));</span>
@@ -2529,7 +2616,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">82.</td>
+<td colspan="1" rowspan="1">84.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2539,7 +2626,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">83.</td>
+<td colspan="1" rowspan="1">85.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2549,14 +2636,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">84.</td>
+<td colspan="1" rowspan="1">86.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">85.</td>
+<td colspan="1" rowspan="1">87.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;
               <span class="codefrag">public int run(String[] args) throws Exception {</span>
@@ -2566,7 +2653,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">86.</td>
+<td colspan="1" rowspan="1">88.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
@@ -2578,7 +2665,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">87.</td>
+<td colspan="1" rowspan="1">89.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">conf.setJobName("wordcount");</span>
@@ -2588,14 +2675,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">88.</td>
+<td colspan="1" rowspan="1">90.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">89.</td>
+<td colspan="1" rowspan="1">91.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">conf.setOutputKeyClass(Text.class);</span>
@@ -2605,7 +2692,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">90.</td>
+<td colspan="1" rowspan="1">92.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">conf.setOutputValueClass(IntWritable.class);</span>
@@ -2615,24 +2702,24 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">91.</td>
+<td colspan="1" rowspan="1">93.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">92.</td>
+<td colspan="1" rowspan="1">94.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
-              <span class="codefrag">conf.setMapperClass(MapClass.class);</span>
+              <span class="codefrag">conf.setMapperClass(Map.class);</span>
             </td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">93.</td>
+<td colspan="1" rowspan="1">95.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">conf.setCombinerClass(Reduce.class);</span>
@@ -2642,7 +2729,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">94.</td>
+<td colspan="1" rowspan="1">96.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">conf.setReducerClass(Reduce.class);</span>
@@ -2652,14 +2739,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">95.</td>
+<td colspan="1" rowspan="1">97.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">96.</td>
+<td colspan="1" rowspan="1">98.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">conf.setInputFormat(TextInputFormat.class);</span>
@@ -2669,7 +2756,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">97.</td>
+<td colspan="1" rowspan="1">99.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">conf.setOutputFormat(TextOutputFormat.class);</span>
@@ -2679,14 +2766,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">98.</td>
+<td colspan="1" rowspan="1">100.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">99.</td>
+<td colspan="1" rowspan="1">101.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
@@ -2698,7 +2785,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">100.</td>
+<td colspan="1" rowspan="1">102.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">for (int i=0; i &lt; args.length; ++i) {</span>
@@ -2708,17 +2795,17 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">101.</td>
+<td colspan="1" rowspan="1">103.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-              <span class="codefrag">if ("-skip".equals(args[i]) {</span>
+              <span class="codefrag">if ("-skip".equals(args[i])) {</span>
             </td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">102.</td>
+<td colspan="1" rowspan="1">104.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
@@ -2730,7 +2817,19 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">103.</td>
+<td colspan="1" rowspan="1">105.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                conf.setBoolean("wordcount.skip.patterns", true);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">106.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">} else {</span>
@@ -2740,7 +2839,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">104.</td>
+<td colspan="1" rowspan="1">107.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">other_args.add(args[i]);</span>
@@ -2750,7 +2849,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">105.</td>
+<td colspan="1" rowspan="1">108.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2760,7 +2859,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">106.</td>
+<td colspan="1" rowspan="1">109.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2770,41 +2869,41 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">107.</td>
+<td colspan="1" rowspan="1">110.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">108.</td>
+<td colspan="1" rowspan="1">111.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
-              <span class="codefrag">conf.setInputPath(new Path(other_args[0]));</span>
+              <span class="codefrag">conf.setInputPath(new Path(other_args.get(0)));</span>
             </td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">109.</td>
+<td colspan="1" rowspan="1">112.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
-              <span class="codefrag">conf.setOutputPath(new Path(other_args[1]));</span>
+              <span class="codefrag">conf.setOutputPath(new Path(other_args.get(1)));</span>
             </td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">110.</td>
+<td colspan="1" rowspan="1">113.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">111.</td>
+<td colspan="1" rowspan="1">114.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">JobClient.runJob(conf);</span>
@@ -2814,7 +2913,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">112.</td>
+<td colspan="1" rowspan="1">115.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">return 0;</span>
@@ -2824,7 +2923,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">113.</td>
+<td colspan="1" rowspan="1">116.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2834,14 +2933,14 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">114.</td>
+<td colspan="1" rowspan="1">117.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
           
 <tr>
             
-<td colspan="1" rowspan="1">115.</td>
+<td colspan="1" rowspan="1">118.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;
               <span class="codefrag">
@@ -2853,7 +2952,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">116.</td>
+<td colspan="1" rowspan="1">119.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">
@@ -2866,7 +2965,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">117.</td>
+<td colspan="1" rowspan="1">120.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;&nbsp;&nbsp;
               <span class="codefrag">System.exit(res);</span>
@@ -2876,7 +2975,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">118.</td>
+<td colspan="1" rowspan="1">121.</td>
             <td colspan="1" rowspan="1">
               &nbsp;&nbsp;
               <span class="codefrag">}</span>
@@ -2886,7 +2985,7 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">119.</td>
+<td colspan="1" rowspan="1">122.</td>
             <td colspan="1" rowspan="1">
               <span class="codefrag">}</span>
             </td>
@@ -2895,13 +2994,13 @@
           
 <tr>
             
-<td colspan="1" rowspan="1">120.</td>
+<td colspan="1" rowspan="1">123.</td>
             <td colspan="1" rowspan="1"></td>
           
 </tr>
         
 </table>
-<a name="N111C3"></a><a name="Sample+Runs"></a>
+<a name="N11214"></a><a name="Sample+Runs"></a>
 <h3 class="h4">Sample Runs</h3>
 <p>Sample text-files as input:</p>
 <p>
@@ -2928,7 +3027,7 @@
 <span class="codefrag">$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file02</span>
 <br>
           
-<span class="codefrag">Hello Hadoop, Goodbye the Hadoop.</span>
+<span class="codefrag">Hello Hadoop, Goodbye to hadoop.</span>
         
 </p>
 <p>Run the application:</p>
@@ -2958,9 +3057,6 @@
 <span class="codefrag">Hadoop,    1</span>
 <br>
           
-<span class="codefrag">Hadoop.    1</span>
-<br>
-          
 <span class="codefrag">Hello    2</span>
 <br>
           
@@ -2970,7 +3066,10 @@
 <span class="codefrag">World,    1</span>
 <br>
           
-<span class="codefrag">the    1</span>
+<span class="codefrag">hadoop.    1</span>
+<br>
+          
+<span class="codefrag">to    1</span>
 <br>
         
 </p>
@@ -2992,7 +3091,7 @@
 <span class="codefrag">\!</span>
 <br>
           
-<span class="codefrag">the</span>
+<span class="codefrag">to</span>
 <br>
         
 </p>
@@ -3021,7 +3120,7 @@
 <span class="codefrag">Goodbye    1</span>
 <br>
           
-<span class="codefrag">Hadoop    2</span>
+<span class="codefrag">Hadoop    1</span>
 <br>
           
 <span class="codefrag">Hello    2</span>
@@ -3029,6 +3128,9 @@
           
 <span class="codefrag">World    2</span>
 <br>
+          
+<span class="codefrag">hadoop    1</span>
+<br>
         
 </p>
 <p>Run it once more, this time switch-off case-sensitivity:</p>
@@ -3066,8 +3168,8 @@
 <br>
         
 </p>
-<a name="N11293"></a><a name="Salient+Points"></a>
-<h3 class="h4">Salient Points</h3>
+<a name="N112E8"></a><a name="Highlights"></a>
+<h3 class="h4">Highlights</h3>
 <p>The second version of <span class="codefrag">WordCount</span> improves upon the 
         previous one by using some features offered by the Map-Reduce framework:
         </p>
@@ -3076,26 +3178,26 @@
 <li>
             Demonstrates how applications can access configuration parameters
             in the <span class="codefrag">configure</span> method of the <span class="codefrag">Mapper</span> (and
-            <span class="codefrag">Reducer</span>) implementations (lines 28-41).
+            <span class="codefrag">Reducer</span>) implementations (lines 28-43).
           </li>
           
 <li>
             Demonstrates how the <span class="codefrag">DistributedCache</span> can be used to 
             distribute read-only data needed by the jobs. Here it allows the user 
-            to specify word-patterns to skip while counting (line 102).
+            to specify word-patterns to skip while counting (line 104).
           </li>
           
 <li>
             Demonstrates the utility of the <span class="codefrag">Tool</span> interface and the
             <span class="codefrag">GenericOptionsParser</span> to handle generic Hadoop 
-            command-line options (lines 85-86, 116).
+            command-line options (lines 87-116, 119).
           </li>
           
 <li>
-            Demonstrates how applications can use <span class="codefrag">Counters</span> (line 66)
+            Demonstrates how applications can use <span class="codefrag">Counters</span> (line 68)
             and how they can set application-specific status information via 
             the <span class="codefrag">Reporter</span> instance passed to the <span class="codefrag">map</span> (and
-            <span class="codefrag">reduce</span>) method (line 70).
+            <span class="codefrag">reduce</span>) method (line 72).
           </li>
         
 </ul>
@@ -3111,18 +3213,27 @@
     
   
 </div>
+<!--+
+    |end content
+    +-->
 <div class="clearboth">&nbsp;</div>
 </div>
 <div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
 <div class="lastmodified">
 <script type="text/javascript"><!--
-document.write("<text>Last Published:</text> " + document.lastModified);
+document.write("Last Published: " + document.lastModified);
 //  --></script>
 </div>
 <div class="copyright">
         Copyright &copy;
          2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
 </div>
+<!--+
+    |end bottomstrip
+    +-->
 </div>
 </body>
 </html>



Mime
View raw message