incubator-accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From afu...@apache.org
Subject svn commit: r1241624 - /incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
Date Tue, 07 Feb 2012 21:02:37 GMT
Author: afuchs
Date: Tue Feb  7 21:02:36 2012
New Revision: 1241624

URL: http://svn.apache.org/viewvc?rev=1241624&view=rev
Log:
ACCUMULO-375 fixed bugs in job setup

Modified:
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java

Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java?rev=1241624&r1=1241623&r2=1241624&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
(original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
Tue Feb  7 21:02:36 2012
@@ -173,6 +173,8 @@ public class WikipediaPartitionedIngeste
     // setup output format
     partitionerJob.setMapOutputKeyClass(Text.class);
     partitionerJob.setMapOutputValueClass(Article.class);
+    partitionerJob.setOutputKeyClass(Text.class);
+    partitionerJob.setOutputValueClass(Article.class);
     partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class);
     Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf);
     SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir);
@@ -186,6 +188,8 @@ public class WikipediaPartitionedIngeste
     Configuration ingestConf = ingestJob.getConfiguration();
     ingestConf.set("mapred.map.tasks.speculative.execution", "false");
 
+    configureIngestJob(ingestJob);
+    
     String tablename = WikipediaConfiguration.getTableName(ingestConf);
     
     String zookeepers = WikipediaConfiguration.getZookeepers(ingestConf);
@@ -199,6 +203,9 @@ public class WikipediaPartitionedIngeste
     
     createTables(tops, tablename);
     
+    ingestJob.setMapperClass(WikipediaPartitionedMapper.class);
+    ingestJob.setNumReduceTasks(0);
+    
     // setup input format
     ingestJob.setInputFormatClass(SequenceFileInputFormat.class);
     SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
@@ -227,6 +234,11 @@ public class WikipediaPartitionedIngeste
     conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
     conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
   }
+
+  protected void configureIngestJob(Job job) {
+    job.setJarByClass(WikipediaPartitionedIngester.class);
+    job.setInputFormatClass(WikipediaInputFormat.class);
+  }
   
   protected static final Pattern filePattern = Pattern.compile("([a-z_]+).*.xml(.bz2)?");
   



Mime
View raw message