accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ujustgotbi...@apache.org
Subject [22/50] git commit: ACCUMULO-375 made min input split size configurable
Date Thu, 06 Feb 2014 05:40:01 GMT
ACCUMULO-375 made min input split size configurable

git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1245684 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/e24faaf9
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/e24faaf9
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/e24faaf9

Branch: refs/heads/1.4.5-SNAPSHOT
Commit: e24faaf99b05f5c1d68bf07444043ef9bf5ba048
Parents: 2e366aa
Author: Adam Fuchs <afuchs@apache.org>
Authored: Fri Feb 17 18:00:57 2012 +0000
Committer: Adam Fuchs <afuchs@apache.org>
Committed: Fri Feb 17 18:00:57 2012 +0000

----------------------------------------------------------------------
 .../examples/wikisearch/ingest/WikipediaConfiguration.java      | 5 +++++
 .../wikisearch/ingest/WikipediaPartitionedIngester.java         | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e24faaf9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
index a84d90c..27a28a1 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java
@@ -56,6 +56,7 @@ public class WikipediaConfiguration {
   public final static String BULK_INGEST_DIR = "wikipedia.bulk.ingest.dir";
   public final static String BULK_INGEST_FAILURE_DIR = "wikipedia.bulk.ingest.failure.dir";
   public final static String BULK_INGEST_BUFFER_SIZE = "wikipedia.bulk.ingest.buffer.size";
+  public final static String PARTITIONED_INPUT_MIN_SPLIT_SIZE = "wikipedia.min.input.split.size";
   
   
   public static String getUser(Configuration conf) {
@@ -130,6 +131,10 @@ public class WikipediaConfiguration {
     return new Path(conf.get(PARTITIONED_ARTICLES_DIRECTORY));
   }
   
+  public static long getMinInputSplitSize(Configuration conf) {
+    return conf.getLong(PARTITIONED_INPUT_MIN_SPLIT_SIZE, 1l << 27);
+  }
+
   public static boolean runPartitioner(Configuration conf) {
     return conf.getBoolean(RUN_PARTITIONER, false);
   }

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e24faaf9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
----------------------------------------------------------------------
diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
index bcdee43..90b8308 100644
--- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
+++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
@@ -217,7 +217,8 @@ public class WikipediaPartitionedIngester extends Configured implements
Tool {
     // setup input format
     ingestJob.setInputFormatClass(SequenceFileInputFormat.class);
     SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
-    SequenceFileInputFormat.setMinInputSplitSize(ingestJob, 1l << 28);
+    // TODO make split size configurable
+    SequenceFileInputFormat.setMinInputSplitSize(ingestJob, WikipediaConfiguration.getMinInputSplitSize(ingestConf));
 
     // setup output format
     ingestJob.setMapOutputKeyClass(Text.class);


Mime
View raw message