Return-Path: X-Original-To: apmail-avro-commits-archive@www.apache.org Delivered-To: apmail-avro-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 572286B08 for ; Mon, 30 May 2011 08:31:53 +0000 (UTC) Received: (qmail 19925 invoked by uid 500); 30 May 2011 08:31:53 -0000 Delivered-To: apmail-avro-commits-archive@avro.apache.org Received: (qmail 19899 invoked by uid 500); 30 May 2011 08:31:53 -0000 Mailing-List: contact commits-help@avro.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@avro.apache.org Delivered-To: mailing list commits@avro.apache.org Received: (qmail 19891 invoked by uid 99); 30 May 2011 08:31:53 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 30 May 2011 08:31:53 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 30 May 2011 08:31:50 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id D503523889E1; Mon, 30 May 2011 08:31:28 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1129056 - in /avro/branches/branch-1.5: ./ lang/java/mapred/src/main/java/org/apache/avro/mapred/ lang/java/mapred/src/test/java/org/apache/avro/mapred/ Date: Mon, 30 May 2011 08:31:28 -0000 To: commits@avro.apache.org From: cutting@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110530083128.D503523889E1@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: cutting Date: Mon May 30 08:31:28 2011 New Revision: 1129056 URL: http://svn.apache.org/viewvc?rev=1129056&view=rev Log: Merge -c 1129053 from trunk to 1.5 branch. Fixes: AVRO-830. Added: avro/branches/branch-1.5/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroTextOutputFormat.java - copied unchanged from r1129053, avro/trunk/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroTextOutputFormat.java avro/branches/branch-1.5/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextOutputFormat.java - copied unchanged from r1129053, avro/trunk/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextOutputFormat.java avro/branches/branch-1.5/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextSort.java - copied unchanged from r1129053, avro/trunk/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextSort.java Removed: avro/branches/branch-1.5/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroAsTextInputFormat.java Modified: avro/branches/branch-1.5/ (props changed) avro/branches/branch-1.5/CHANGES.txt avro/branches/branch-1.5/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java avro/branches/branch-1.5/lang/java/mapred/src/test/java/org/apache/avro/mapred/WordCountUtil.java Propchange: avro/branches/branch-1.5/ ------------------------------------------------------------------------------ --- svn:mergeinfo (original) +++ svn:mergeinfo Mon May 30 08:31:28 2011 @@ -1 +1 @@ -/avro/trunk:1075938,1075993,1078917,1079055,1079060,1079063,1083246,1085921,1086727,1086730,1086866,1087076,1087129,1087136,1087439-1087440,1087463,1087472,1087792,1089128,1089131,1089550,1094812,1095206-1095208,1095493,1095529,1095548,1095550,1096798,1097916,1097927,1097968,1097974,1102332,1102335,1124127,1124971 +/avro/trunk:1075938,1075993,1078917,1079055,1079060,1079063,1083246,1085921,1086727,1086730,1086866,1087076,1087129,1087136,1087439-1087440,1087463,1087472,1087792,1089128,1089131,1089550,1094812,1095206-1095208,1095493,1095529,1095548,1095550,1096798,1097916,1097927,1097968,1097974,1102332,1102335,1124127,1124971,1129053 Modified: avro/branches/branch-1.5/CHANGES.txt URL: http://svn.apache.org/viewvc/avro/branches/branch-1.5/CHANGES.txt?rev=1129056&r1=1129055&r2=1129056&view=diff ============================================================================== --- avro/branches/branch-1.5/CHANGES.txt (original) +++ avro/branches/branch-1.5/CHANGES.txt Mon May 30 08:31:28 2011 @@ -4,6 +4,10 @@ Avro 1.5.2 (unreleased) NEW FEATURES + AVRO-830. Java: Add AvroTextOutputFormat to permit Hadoop + streaming jobs to easily write Avro format output with "bytes" as + schema. (Tom White via cutting) + IMPROVEMENTS AVRO-820. Java: Permit applications to catch exceptions thrown Modified: avro/branches/branch-1.5/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java URL: http://svn.apache.org/viewvc/avro/branches/branch-1.5/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java?rev=1129056&r1=1129055&r2=1129056&view=diff ============================================================================== --- avro/branches/branch-1.5/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java (original) +++ avro/branches/branch-1.5/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java Mon May 30 08:31:28 2011 @@ -19,6 +19,7 @@ package org.apache.avro.mapred; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.Map; import java.net.URLDecoder; @@ -64,21 +65,9 @@ public class AvroOutputFormat public static void setSyncInterval(JobConf job, int syncIntervalInBytes) { job.setInt(SYNC_INTERVAL_KEY, syncIntervalInBytes); } - - @Override - public RecordWriter, NullWritable> - getRecordWriter(FileSystem ignore, JobConf job, - String name, Progressable prog) - throws IOException { - - boolean isMapOnly = job.getNumReduceTasks() == 0; - Schema schema = isMapOnly - ? AvroJob.getMapOutputSchema(job) - : AvroJob.getOutputSchema(job); - - final DataFileWriter writer = - new DataFileWriter(new ReflectDatumWriter()); - + + static void configureDataFileWriter(DataFileWriter writer, + JobConf job) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); @@ -87,7 +76,7 @@ public class AvroOutputFormat : CodecFactory.fromString(codecName); writer.setCodec(factory); } - + writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job @@ -100,6 +89,23 @@ public class AvroOutputFormat URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } + } + + @Override + public RecordWriter, NullWritable> + getRecordWriter(FileSystem ignore, JobConf job, + String name, Progressable prog) + throws IOException { + + boolean isMapOnly = job.getNumReduceTasks() == 0; + Schema schema = isMapOnly + ? AvroJob.getMapOutputSchema(job) + : AvroJob.getOutputSchema(job); + + final DataFileWriter writer = + new DataFileWriter(new ReflectDatumWriter()); + + configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name+EXT); writer.create(schema, path.getFileSystem(job).create(path)); Modified: avro/branches/branch-1.5/lang/java/mapred/src/test/java/org/apache/avro/mapred/WordCountUtil.java URL: http://svn.apache.org/viewvc/avro/branches/branch-1.5/lang/java/mapred/src/test/java/org/apache/avro/mapred/WordCountUtil.java?rev=1129056&r1=1129055&r2=1129056&view=diff ============================================================================== --- avro/branches/branch-1.5/lang/java/mapred/src/test/java/org/apache/avro/mapred/WordCountUtil.java (original) +++ avro/branches/branch-1.5/lang/java/mapred/src/test/java/org/apache/avro/mapred/WordCountUtil.java Mon May 30 08:31:28 2011 @@ -44,6 +44,7 @@ import org.apache.avro.Schema; import org.apache.avro.util.Utf8; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; +import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.file.DataFileWriter; @@ -60,7 +61,7 @@ class WordCountUtil { private static final File COUNTS_FILE = new File(new File(DIR, "out"), "part-00000.avro"); private static final File SORTED_FILE - = new File(new File(DIR, "out"), "part-00000"); + = new File(new File(DIR, "out"), "part-00000.avro"); public static final String[] LINES = new String[] { "the quick brown fox jumps over the lazy dog", @@ -131,18 +132,25 @@ class WordCountUtil { } public static void validateSortedFile() throws Exception { - BufferedReader reader = new BufferedReader(new FileReader(SORTED_FILE)); + DatumReader reader = new GenericDatumReader(); + InputStream in = new BufferedInputStream( + new FileInputStream(SORTED_FILE)); + DataFileStream lines = + new DataFileStream(in,reader); List sortedLines = new ArrayList(); for (String line : LINES) { sortedLines.add(line); } Collections.sort(sortedLines); for (String expectedLine : sortedLines) { - assertEquals(expectedLine, reader.readLine().trim()); + ByteBuffer buf = lines.next(); + byte[] b = new byte[buf.remaining()]; + buf.get(b); + assertEquals(expectedLine, new String(b, "UTF-8").trim()); } - assertNull(reader.readLine()); + assertFalse(lines.hasNext()); } - + // metadata tests private static final String STRING_KEY = "string-key"; private static final String LONG_KEY = "long-key";