incubator-accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From afu...@apache.org
Subject svn commit: r1245684 - in /incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest: WikipediaConfiguration.java WikipediaPartitionedIngester.java
Date Fri, 17 Feb 2012 18:00:57 GMT
Author: afuchs
Date: Fri Feb 17 18:00:57 2012
New Revision: 1245684

URL: http://svn.apache.org/viewvc?rev=1245684&view=rev
Log:
ACCUMULO-375 made min input split size configurable

Modified:
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java

Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java?rev=1245684&r1=1245683&r2=1245684&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
(original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
Fri Feb 17 18:00:57 2012
@@ -56,6 +56,7 @@ public class WikipediaConfiguration {
   public final static String BULK_INGEST_DIR = "wikipedia.bulk.ingest.dir";
   public final static String BULK_INGEST_FAILURE_DIR = "wikipedia.bulk.ingest.failure.dir";
   public final static String BULK_INGEST_BUFFER_SIZE = "wikipedia.bulk.ingest.buffer.size";
+  public final static String PARTITIONED_INPUT_MIN_SPLIT_SIZE = "wikipedia.min.input.split.size";
   
   
   public static String getUser(Configuration conf) {
@@ -130,6 +131,10 @@ public class WikipediaConfiguration {
     return new Path(conf.get(PARTITIONED_ARTICLES_DIRECTORY));
   }
   
+  public static long getMinInputSplitSize(Configuration conf) {
+    return conf.getLong(PARTITIONED_INPUT_MIN_SPLIT_SIZE, 1l << 27);
+  }
+
   public static boolean runPartitioner(Configuration conf) {
     return conf.getBoolean(RUN_PARTITIONER, false);
   }

Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java?rev=1245684&r1=1245683&r2=1245684&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
(original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
Fri Feb 17 18:00:57 2012
@@ -217,7 +217,8 @@ public class WikipediaPartitionedIngeste
     // setup input format
     ingestJob.setInputFormatClass(SequenceFileInputFormat.class);
     SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
-    SequenceFileInputFormat.setMinInputSplitSize(ingestJob, 1l << 28);
+    // TODO make split size configurable
+    SequenceFileInputFormat.setMinInputSplitSize(ingestJob, WikipediaConfiguration.getMinInputSplitSize(ingestConf));
 
     // setup output format
     ingestJob.setMapOutputKeyClass(Text.class);



Mime
View raw message