Return-Path: Delivered-To: apmail-lucene-hadoop-commits-archive@locus.apache.org Received: (qmail 72859 invoked from network); 11 Apr 2007 19:38:42 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 11 Apr 2007 19:38:42 -0000 Received: (qmail 70824 invoked by uid 500); 11 Apr 2007 19:38:48 -0000 Delivered-To: apmail-lucene-hadoop-commits-archive@lucene.apache.org Received: (qmail 70809 invoked by uid 500); 11 Apr 2007 19:38:48 -0000 Mailing-List: contact hadoop-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hadoop-dev@lucene.apache.org Delivered-To: mailing list hadoop-commits@lucene.apache.org Received: (qmail 70800 invoked by uid 99); 11 Apr 2007 19:38:48 -0000 Received: from herse.apache.org (HELO herse.apache.org) (140.211.11.133) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 11 Apr 2007 12:38:48 -0700 X-ASF-Spam-Status: No, hits=-99.5 required=10.0 tests=ALL_TRUSTED,NO_REAL_NAME X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 11 Apr 2007 12:38:41 -0700 Received: by eris.apache.org (Postfix, from userid 65534) id 61BA11A9838; Wed, 11 Apr 2007 12:38:21 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r527632 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/TextOutputFormat.java src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java src/test/org/apache/hadoop/mapred/UtilsForTests.java Date: Wed, 11 Apr 2007 19:38:21 -0000 To: hadoop-commits@lucene.apache.org From: cutting@apache.org X-Mailer: svnmailer-1.1.0 Message-Id: <20070411193821.61BA11A9838@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: cutting Date: Wed Apr 11 12:38:20 2007 New Revision: 527632 URL: http://svn.apache.org/viewvc?view=rev&rev=527632 Log: HADOOP-819. Change LineRecordWriter to not insert a tab between key and value when either is null, and to print nothing when both are null. Contributed by Runping Qi. Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/UtilsForTests.java Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=527632&r1=527631&r2=527632 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Wed Apr 11 12:38:20 2007 @@ -165,6 +165,10 @@ 50. HADOOP-1189. Fix 'No space left on device' exceptions on datanodes. (Raghu Angadi via tomwhite) +51. HADOOP-819. Change LineRecordWriter to not insert a tab between + key and value when either is null, and to print nothing when both + are null. (Runping Qi via cutting) + Release 0.12.3 - 2007-04-06 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java?view=diff&rev=527632&r1=527631&r2=527632 ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java Wed Apr 11 12:38:20 2007 @@ -42,15 +42,26 @@ } public synchronized void write(WritableComparable key, Writable value) - throws IOException { - out.write(key.toString().getBytes("UTF-8")); - out.writeByte('\t'); - out.write(value.toString().getBytes("UTF-8")); + throws IOException { + + if (key == null && value == null) { + return; + } + if (key != null) { + out.write(key.toString().getBytes("UTF-8")); + } + if (key != null && value != null) { + out.write("\t".getBytes("UTF-8")); + } + if (value != null) { + out.write(value.toString().getBytes("UTF-8")); + } out.writeByte('\n'); } + public synchronized void close(Reporter reporter) throws IOException { out.close(); - } + } } public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java?view=auto&rev=527632 ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java (added) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java Wed Apr 11 12:38:20 2007 @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapred; + +import java.io.*; +import java.util.*; +import junit.framework.TestCase; + +import org.apache.commons.logging.*; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.io.*; +import org.apache.hadoop.io.compress.*; +import org.apache.hadoop.util.ReflectionUtils; + +public class TestTextOutputFormat extends TestCase { + private static final Log LOG = LogFactory.getLog(TestTextOutputFormat.class + .getName()); + + private static JobConf defaultConf = new JobConf(); + + private static FileSystem localFs = null; + static { + try { + localFs = FileSystem.getLocal(defaultConf); + } catch (IOException e) { + throw new RuntimeException("init failure", e); + } + } + + private static Path workDir = new Path(new Path(System.getProperty( + "test.build.data", "."), "data"), "TestTextOutputFormat"); + + public void testFormat() throws Exception { + JobConf job = new JobConf(); + job.setOutputPath(workDir); + String file = "test.txt"; + + // A reporter that does nothing + Reporter reporter = Reporter.NULL; + + TextOutputFormat theOutputFormat = new TextOutputFormat(); + RecordWriter theRecodWriter = theOutputFormat.getRecordWriter(localFs, job, + file, reporter); + + Text key1 = new Text("key1"); + Text key2 = new Text("key2"); + Text val1 = new Text("val1"); + Text val2 = new Text("val2"); + + try { + theRecodWriter.write(key1, val1); + theRecodWriter.write(null, val1); + theRecodWriter.write(key1, null); + theRecodWriter.write(null, null); + theRecodWriter.write(key2, val2); + + } finally { + theRecodWriter.close(reporter); + } + File expectedFile = new File(new Path(workDir, file).toString()); + StringBuffer expectedOutput = new StringBuffer(); + expectedOutput.append(key1).append('\t').append(val1).append("\n"); + expectedOutput.append(val1).append("\n"); + expectedOutput.append(key1).append("\n"); + expectedOutput.append(key2).append('\t').append(val2).append("\n"); + String output = UtilsForTests.slurp(expectedFile); + assertEquals(output, expectedOutput.toString()); + + } + + public static void main(String[] args) throws Exception { + new TestTextOutputFormat().testFormat(); + } +} Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/UtilsForTests.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/UtilsForTests.java?view=auto&rev=527632 ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/UtilsForTests.java (added) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/UtilsForTests.java Wed Apr 11 12:38:20 2007 @@ -0,0 +1,192 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapred; + +import java.text.DecimalFormat; +import java.io.*; +import java.net.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Enumeration; +import java.util.Iterator; +import java.util.List; +import java.util.jar.*; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; + +/** + * Utilities used in unit test. + * + */ +public class UtilsForTests { + + final static long KB = 1024L * 1; + final static long MB = 1024L * KB; + final static long GB = 1024L * MB; + final static long TB = 1024L * GB; + final static long PB = 1024L * TB; + + static DecimalFormat dfm = new DecimalFormat("####.000"); + static DecimalFormat ifm = new DecimalFormat("###,###,###,###,###"); + + public static String dfmt(double d) { + return dfm.format(d); + } + + public static String ifmt(double d) { + return ifm.format(d); + } + + public static String formatBytes(long numBytes) { + StringBuffer buf = new StringBuffer(); + boolean bDetails = true; + double num = numBytes; + + if (numBytes < KB) { + buf.append(numBytes + " B"); + bDetails = false; + } else if (numBytes < MB) { + buf.append(dfmt(num / KB) + " KB"); + } else if (numBytes < GB) { + buf.append(dfmt(num / MB) + " MB"); + } else if (numBytes < TB) { + buf.append(dfmt(num / GB) + " GB"); + } else if (numBytes < PB) { + buf.append(dfmt(num / TB) + " TB"); + } else { + buf.append(dfmt(num / PB) + " PB"); + } + if (bDetails) { + buf.append(" (" + ifmt(numBytes) + " bytes)"); + } + return buf.toString(); + } + + public static String formatBytes2(long numBytes) { + StringBuffer buf = new StringBuffer(); + long u = 0; + if (numBytes >= TB) { + u = numBytes / TB; + numBytes -= u * TB; + buf.append(u + " TB "); + } + if (numBytes >= GB) { + u = numBytes / GB; + numBytes -= u * GB; + buf.append(u + " GB "); + } + if (numBytes >= MB) { + u = numBytes / MB; + numBytes -= u * MB; + buf.append(u + " MB "); + } + if (numBytes >= KB) { + u = numBytes / KB; + numBytes -= u * KB; + buf.append(u + " KB "); + } + buf.append(u + " B"); //even if zero + return buf.toString(); + } + + static final String regexpSpecials = "[]()?*+|.!^-\\~@"; + + public static String regexpEscape(String plain) { + StringBuffer buf = new StringBuffer(); + char[] ch = plain.toCharArray(); + int csup = ch.length; + for (int c = 0; c < csup; c++) { + if (regexpSpecials.indexOf(ch[c]) != -1) { + buf.append("\\"); + } + buf.append(ch[c]); + } + return buf.toString(); + } + + public static String safeGetCanonicalPath(File f) { + try { + String s = f.getCanonicalPath(); + return (s == null) ? f.toString() : s; + } catch (IOException io) { + return f.toString(); + } + } + + static String slurp(File f) throws IOException { + int len = (int) f.length(); + byte[] buf = new byte[len]; + FileInputStream in = new FileInputStream(f); + String contents = null; + try { + in.read(buf, 0, len); + contents = new String(buf, "UTF-8"); + } finally { + in.close(); + } + return contents; + } + + static String slurpHadoop(Path p, FileSystem fs) throws IOException { + int len = (int) fs.getLength(p); + byte[] buf = new byte[len]; + InputStream in = fs.open(p); + String contents = null; + try { + in.read(buf, 0, len); + contents = new String(buf, "UTF-8"); + } finally { + in.close(); + } + return contents; + } + + public static String rjustify(String s, int width) { + if (s == null) s = "null"; + if (width > s.length()) { + s = getSpace(width - s.length()) + s; + } + return s; + } + + public static String ljustify(String s, int width) { + if (s == null) s = "null"; + if (width > s.length()) { + s = s + getSpace(width - s.length()); + } + return s; + } + + static char[] space; + static { + space = new char[300]; + Arrays.fill(space, '\u0020'); + } + + public static String getSpace(int len) { + if (len > space.length) { + space = new char[Math.max(len, 2 * space.length)]; + Arrays.fill(space, '\u0020'); + } + return new String(space, 0, len); + } +}