hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r428862 - in /lucene/hadoop/trunk: CHANGES.txt src/examples/python/ src/examples/python/WordCount.py src/examples/python/compile
Date Fri, 04 Aug 2006 20:01:48 GMT
Author: cutting
Date: Fri Aug  4 13:01:48 2006
New Revision: 428862

URL: http://svn.apache.org/viewvc?rev=428862&view=rev
Log:
HADOOP-425.  Add a python MapReduce example, using Jython.  Contributed by Owen.

Added:
    lucene/hadoop/trunk/src/examples/python/
    lucene/hadoop/trunk/src/examples/python/WordCount.py
    lucene/hadoop/trunk/src/examples/python/compile
Modified:
    lucene/hadoop/trunk/CHANGES.txt

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=428862&r1=428861&r2=428862&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Aug  4 13:01:48 2006
@@ -142,6 +142,9 @@
 40. HADOOP-226.  Fix fsck command to properly consider replication
     counts, now that these can vary per file.  (Bryan Pendleton via cutting)
 
+41. HADOOP-425.  Add a Python MapReduce example, using Jython.
+    (omalley via cutting)
+
 
 Release 0.4.0 - 2006-06-28
 

Added: lucene/hadoop/trunk/src/examples/python/WordCount.py
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/python/WordCount.py?rev=428862&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/examples/python/WordCount.py (added)
+++ lucene/hadoop/trunk/src/examples/python/WordCount.py Fri Aug  4 13:01:48 2006
@@ -0,0 +1,68 @@
+#
+# Copyright 2006 The Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from org.apache.hadoop.fs import Path
+from org.apache.hadoop.io import *
+from org.apache.hadoop.mapred import *
+
+import sys
+import getopt
+
+class WordCountMap(Mapper, MapReduceBase):
+    one = IntWritable(1)
+    def map(self, key, value, output, reporter):
+        for w in value.toString().split():
+            output.collect(Text(w), self.one)
+
+class Summer(Reducer, MapReduceBase):
+    def reduce(self, key, values, output, reporter):
+        sum = 0
+        while values.hasNext():
+            sum += values.next().get()
+        output.collect(key, IntWritable(sum))
+
+def printUsage(code):
+    print "wordcount [-m <maps>] [-r <reduces>] <input> <output>"
+    sys.exit(code)
+
+def main(args):
+    conf = JobConf(WordCountMap);
+    conf.setJobName("wordcount");
+ 
+    conf.setOutputKeyClass(Text);
+    conf.setOutputValueClass(IntWritable);
+    
+    conf.setMapperClass(WordCountMap);        
+    conf.setCombinerClass(Summer);
+    conf.setReducerClass(Summer);
+    try:
+        flags, other_args = getopt.getopt(args[1:], "m:r:")
+    except getopt.GetoptError:
+        printUsage(1)
+    if len(other_args) != 2:
+        printUsage(1)
+    
+    for f,v in flags:
+        if f == "-m":
+            conf.setNumMapTasks(int(v))
+        elif f == "-r":
+            conf.setNumReduceTasks(int(v))
+    conf.setInputPath(Path(other_args[0]))
+    conf.setOutputPath(Path(other_args[1]))
+    JobClient.runJob(conf);
+
+if __name__ == "__main__":
+    main(sys.argv)

Added: lucene/hadoop/trunk/src/examples/python/compile
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/python/compile?rev=428862&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/examples/python/compile (added)
+++ lucene/hadoop/trunk/src/examples/python/compile Fri Aug  4 13:01:48 2006
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+export HADOOP_HOME=../../..
+
+export CLASSPATH="$HADOOP_HOME/build/classes"
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# add libs to CLASSPATH
+for f in $HADOOP_HOME/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# restore ordinary behaviour
+unset IFS
+jythonc -p org.apache.hadoop.examples -d -j wc.jar -c WordCount.py



Mime
View raw message