Return-Path: X-Original-To: apmail-accumulo-commits-archive@www.apache.org Delivered-To: apmail-accumulo-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id A122D107E1 for ; Wed, 19 Mar 2014 16:09:19 +0000 (UTC) Received: (qmail 99981 invoked by uid 500); 19 Mar 2014 16:08:21 -0000 Delivered-To: apmail-accumulo-commits-archive@accumulo.apache.org Received: (qmail 99739 invoked by uid 500); 19 Mar 2014 16:08:17 -0000 Mailing-List: contact commits-help@accumulo.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@accumulo.apache.org Delivered-To: mailing list commits@accumulo.apache.org Received: (qmail 99572 invoked by uid 99); 19 Mar 2014 16:08:12 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 19 Mar 2014 16:08:12 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id E386B984618; Wed, 19 Mar 2014 16:08:11 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: ujustgotbilld@apache.org To: commits@accumulo.apache.org Date: Wed, 19 Mar 2014 16:08:11 -0000 Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: [01/50] [abbrv] git commit: ACCUMULO-375 added compression and increased the minimum split size Repository: accumulo-wikisearch Updated Branches: refs/heads/master f11759eb5 -> 1990979f6 ACCUMULO-375 added compression and increased the minimum split size git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1241940 13f79535-47bb-0310-9956-ffa450edef68 Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/57bf9cf3 Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/57bf9cf3 Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/57bf9cf3 Branch: refs/heads/master Commit: 57bf9cf34b891c12d4361e8d5224a2aa38b63a6c Parents: 266455b Author: Adam Fuchs Authored: Wed Feb 8 15:37:04 2012 +0000 Committer: Adam Fuchs Committed: Wed Feb 8 15:37:04 2012 +0000 ---------------------------------------------------------------------- .../examples/wikisearch/ingest/WikipediaPartitionedIngester.java | 4 ++++ 1 file changed, 4 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/57bf9cf3/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java ---------------------------------------------------------------------- diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java index 43f5e29..5571290 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java @@ -50,6 +50,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; @@ -178,6 +179,8 @@ public class WikipediaPartitionedIngester extends Configured implements Tool { partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class); Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf); SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir); + SequenceFileOutputFormat.setCompressOutput(partitionerJob, true); + SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD); return partitionerJob.waitForCompletion(true) ? 0 : 1; } @@ -209,6 +212,7 @@ public class WikipediaPartitionedIngester extends Configured implements Tool { // setup input format ingestJob.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf)); + SequenceFileInputFormat.setMinInputSplitSize(ingestJob, 1l << 28); // setup output format ingestJob.setMapOutputKeyClass(Text.class);