hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From acmur...@apache.org
Subject svn commit: r597172 - in /lucene/hadoop/trunk: CHANGES.txt src/examples/pipes/Makefile.am src/examples/pipes/Makefile.in src/examples/pipes/impl/sort.cc
Date Wed, 21 Nov 2007 18:30:57 GMT
Author: acmurthy
Date: Wed Nov 21 10:30:56 2007
New Revision: 597172

URL: http://svn.apache.org/viewvc?rev=597172&view=rev
Log:
HADOOP-2127. Added a pipes sort example to benchmark trivial pipes application versus trivial
java application. Contributed by Owen O'Malley.

Added:
    lucene/hadoop/trunk/src/examples/pipes/impl/sort.cc
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/examples/pipes/Makefile.am
    lucene/hadoop/trunk/src/examples/pipes/Makefile.in

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=597172&r1=597171&r2=597172&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed Nov 21 10:30:56 2007
@@ -63,6 +63,9 @@
     HADOOP-2104. Adds a description to the ant targets. This makes the 
     output of "ant -projecthelp" sensible. (Chris Douglas via ddas)
 
+    HADOOP-2127. Added a pipes sort example to benchmark trivial pipes
+    application versus trivial java application. (omalley via acmurthy)
+
   OPTIMIZATIONS
 
     HADOOP-1898.  Release the lock protecting the last time of the last stack

Modified: lucene/hadoop/trunk/src/examples/pipes/Makefile.am
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/pipes/Makefile.am?rev=597172&r1=597171&r2=597172&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/pipes/Makefile.am (original)
+++ lucene/hadoop/trunk/src/examples/pipes/Makefile.am Wed Nov 21 10:30:56 2007
@@ -19,7 +19,7 @@
 LDADD=-L$(HADOOP_UTILS_PREFIX)/lib -L$(HADOOP_PIPES_PREFIX)/lib \
       -lhadooppipes -lhadooputils
 
-bin_PROGRAMS= wordcount-simple wordcount-part wordcount-nopipe
+bin_PROGRAMS= wordcount-simple wordcount-part wordcount-nopipe pipes-sort
 
 # Define the sources for each program
 wordcount_simple_SOURCES = \
@@ -30,4 +30,7 @@
 
 wordcount_nopipe_SOURCES = \
 	impl/wordcount-nopipe.cc
+
+pipes_sort_SOURCES = \
+        impl/sort.cc
 

Modified: lucene/hadoop/trunk/src/examples/pipes/Makefile.in
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/pipes/Makefile.in?rev=597172&r1=597171&r2=597172&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/examples/pipes/Makefile.in (original)
+++ lucene/hadoop/trunk/src/examples/pipes/Makefile.in Wed Nov 21 10:30:56 2007
@@ -14,7 +14,7 @@
 
 @SET_MAKE@
 
-SOURCES = $(wordcount_nopipe_SOURCES) $(wordcount_part_SOURCES) $(wordcount_simple_SOURCES)
+SOURCES = $(pipes_sort_SOURCES) $(wordcount_nopipe_SOURCES) $(wordcount_part_SOURCES) $(wordcount_simple_SOURCES)
 
 srcdir = @srcdir@
 top_srcdir = @top_srcdir@
@@ -38,11 +38,12 @@
 POST_UNINSTALL = :
 host_triplet = @host@
 bin_PROGRAMS = wordcount-simple$(EXEEXT) wordcount-part$(EXEEXT) \
-	wordcount-nopipe$(EXEEXT)
-DIST_COMMON = config.guess config.sub $(srcdir)/Makefile.in \
-	$(srcdir)/Makefile.am $(top_srcdir)/configure \
-	$(am__configure_deps) $(top_srcdir)/impl/config.h.in depcomp \
-	ltmain.sh config.guess config.sub
+	wordcount-nopipe$(EXEEXT) pipes-sort$(EXEEXT)
+DIST_COMMON = config.guess config.guess config.sub config.sub \
+	$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+	$(top_srcdir)/configure $(am__configure_deps) \
+	$(top_srcdir)/impl/config.h.in depcomp depcomp ltmain.sh \
+	ltmain.sh config.guess config.guess config.sub config.sub
 subdir = .
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps =  \
@@ -59,6 +60,10 @@
 binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
 PROGRAMS = $(bin_PROGRAMS)
 am__dirstamp = $(am__leading_dot)dirstamp
+am_pipes_sort_OBJECTS = impl/sort.$(OBJEXT)
+pipes_sort_OBJECTS = $(am_pipes_sort_OBJECTS)
+pipes_sort_LDADD = $(LDADD)
+pipes_sort_DEPENDENCIES =
 am_wordcount_nopipe_OBJECTS = impl/wordcount-nopipe.$(OBJEXT)
 wordcount_nopipe_OBJECTS = $(am_wordcount_nopipe_OBJECTS)
 wordcount_nopipe_LDADD = $(LDADD)
@@ -82,8 +87,8 @@
 CXXLD = $(CXX)
 CXXLINK = $(LIBTOOL) --mode=link --tag=CXX $(CXXLD) $(AM_CXXFLAGS) \
 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
-SOURCES = $(wordcount_nopipe_SOURCES) $(wordcount_part_SOURCES) \
-	$(wordcount_simple_SOURCES)
+SOURCES = $(pipes_sort_SOURCES) $(wordcount_nopipe_SOURCES) \
+	$(wordcount_part_SOURCES) $(wordcount_simple_SOURCES)
 ETAGS = etags
 CTAGS = ctags
 ACLOCAL = @ACLOCAL@
@@ -218,6 +223,9 @@
 wordcount_nopipe_SOURCES = \
 	impl/wordcount-nopipe.cc
 
+pipes_sort_SOURCES = \
+        impl/sort.cc
+
 all: all-am
 
 .SUFFIXES:
@@ -306,6 +314,11 @@
 impl/$(DEPDIR)/$(am__dirstamp):
 	@$(mkdir_p) impl/$(DEPDIR)
 	@: > impl/$(DEPDIR)/$(am__dirstamp)
+impl/sort.$(OBJEXT): impl/$(am__dirstamp) \
+	impl/$(DEPDIR)/$(am__dirstamp)
+pipes-sort$(EXEEXT): $(pipes_sort_OBJECTS) $(pipes_sort_DEPENDENCIES) 
+	@rm -f pipes-sort$(EXEEXT)
+	$(CXXLINK) $(pipes_sort_LDFLAGS) $(pipes_sort_OBJECTS) $(pipes_sort_LDADD) $(LIBS)
 impl/wordcount-nopipe.$(OBJEXT): impl/$(am__dirstamp) \
 	impl/$(DEPDIR)/$(am__dirstamp)
 wordcount-nopipe$(EXEEXT): $(wordcount_nopipe_OBJECTS) $(wordcount_nopipe_DEPENDENCIES) 
@@ -324,6 +337,7 @@
 
 mostlyclean-compile:
 	-rm -f *.$(OBJEXT)
+	-rm -f impl/sort.$(OBJEXT)
 	-rm -f impl/wordcount-nopipe.$(OBJEXT)
 	-rm -f impl/wordcount-part.$(OBJEXT)
 	-rm -f impl/wordcount-simple.$(OBJEXT)
@@ -331,6 +345,7 @@
 distclean-compile:
 	-rm -f *.tab.c
 
+@AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/sort.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/wordcount-nopipe.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/wordcount-part.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/wordcount-simple.Po@am__quote@

Added: lucene/hadoop/trunk/src/examples/pipes/impl/sort.cc
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/pipes/impl/sort.cc?rev=597172&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/examples/pipes/impl/sort.cc (added)
+++ lucene/hadoop/trunk/src/examples/pipes/impl/sort.cc Wed Nov 21 10:30:56 2007
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hadoop/Pipes.hh"
+#include "hadoop/TemplateFactory.hh"
+
+class SortMap: public HadoopPipes::Mapper {
+private:
+  /* the fraction 0.0 to 1.0 of records to keep */
+  float keepFraction;
+  /* the number of records kept so far */
+  long long keptRecords;
+  /* the total number of records */
+  long long totalRecords;
+  static const std::string MAP_KEEP_PERCENT;
+public:
+  /*
+   * Look in the config to find the fraction of records to keep.
+   */
+  SortMap(HadoopPipes::TaskContext& context){
+    const HadoopPipes::JobConf* conf = context.getJobConf();
+    if (conf->hasKey(MAP_KEEP_PERCENT)) {
+      keepFraction = conf->getFloat(MAP_KEEP_PERCENT) / 100.0;
+    } else {
+      keepFraction = 1.0;
+    }
+    keptRecords = 0;
+    totalRecords = 0;
+  }
+
+  void map(HadoopPipes::MapContext& context) {
+    totalRecords += 1;
+    while ((float) keptRecords / totalRecords < keepFraction) {
+      keptRecords += 1;
+      context.emit(context.getInputKey(), context.getInputValue());
+    }
+  }
+};
+
+const std::string SortMap::MAP_KEEP_PERCENT("hadoop.sort.map.keep.percent");
+
+class SortReduce: public HadoopPipes::Reducer {
+private:
+  /* the fraction 0.0 to 1.0 of records to keep */
+  float keepFraction;
+  /* the number of records kept so far */
+  long long keptRecords;
+  /* the total number of records */
+  long long totalRecords;
+  static const std::string REDUCE_KEEP_PERCENT;
+public:
+  SortReduce(HadoopPipes::TaskContext& context){
+    const HadoopPipes::JobConf* conf = context.getJobConf();
+    if (conf->hasKey(REDUCE_KEEP_PERCENT)) {
+      keepFraction = conf->getFloat(REDUCE_KEEP_PERCENT) / 100.0;
+    } else {
+      keepFraction = 1.0;
+    }
+    keptRecords = 0;
+    totalRecords = 0;
+  }
+
+  void reduce(HadoopPipes::ReduceContext& context) {
+    while (context.nextValue()) {
+      totalRecords += 1;
+      while ((float) keptRecords / totalRecords < keepFraction) {
+        keptRecords += 1;
+        context.emit(context.getInputKey(), context.getInputValue());
+      }
+    }
+  }
+};
+
+const std::string 
+  SortReduce::REDUCE_KEEP_PERCENT("hadoop.sort.reduce.keep.percent");
+
+int main(int argc, char *argv[]) {
+  return HadoopPipes::runTask(HadoopPipes::TemplateFactory<SortMap,
+                                                           SortReduce>());
+}
+



Mime
View raw message