Mailing-List: contact hadoop-commits-help@lucene.apache.org; run by ezmlm
Precedence: bulk
Reply-To: hadoop-dev@lucene.apache.org
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: svn commit: r527632 - in /lucene/hadoop/trunk: CHANGES.txt
 src/java/org/apache/hadoop/mapred/TextOutputFormat.java
 src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java
 src/test/org/apache/hadoop/mapred/UtilsForTests.java
Date: Wed, 11 Apr 2007 19:38:21 -0000
To: hadoop-commits@lucene.apache.org
From: cutting@apache.org
Message-Id: <20070411193821.61BA11A9838@eris.apache.org>

Author: cutting
Date: Wed Apr 11 12:38:20 2007
New Revision: 527632

URL: http://svn.apache.org/viewvc?view=rev&rev=527632
Log:
HADOOP-819.  Change LineRecordWriter to not insert a tab between key and value when either is null, and to print nothing when both are null.  Contributed by Runping Qi.

Added:
    lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/UtilsForTests.java
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=527632&r1=527631&r2=527632
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed Apr 11 12:38:20 2007
@@ -165,6 +165,10 @@
 50. HADOOP-1189.  Fix 'No space left on device' exceptions on datanodes.
     (Raghu Angadi via tomwhite)
 
+51. HADOOP-819.  Change LineRecordWriter to not insert a tab between
+    key and value when either is null, and to print nothing when both
+    are null.  (Runping Qi via cutting)
+
 
 Release 0.12.3 - 2007-04-06
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java?view=diff&rev=527632&r1=527631&r2=527632
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java Wed Apr 11 12:38:20 2007
@@ -42,15 +42,26 @@
     }
     
     public synchronized void write(WritableComparable key, Writable value)
-    throws IOException {
-      out.write(key.toString().getBytes("UTF-8"));
-      out.writeByte('\t');
-      out.write(value.toString().getBytes("UTF-8"));
+        throws IOException {
+
+      if (key == null && value == null) {
+        return;
+      }
+      if (key != null) {
+        out.write(key.toString().getBytes("UTF-8"));
+      }
+      if (key != null && value != null) {
+        out.write("\t".getBytes("UTF-8"));
+      }
+      if (value != null) {
+        out.write(value.toString().getBytes("UTF-8"));
+      }
       out.writeByte('\n');
     }
+
     public synchronized void close(Reporter reporter) throws IOException {
       out.close();
-    }   
+    }
   }
   
   public RecordWriter getRecordWriter(FileSystem ignored, JobConf job,

Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java?view=auto&rev=527632
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java (added)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java Wed Apr 11 12:38:20 2007
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.*;
+import java.util.*;
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.io.compress.*;
+import org.apache.hadoop.util.ReflectionUtils;
+
+public class TestTextOutputFormat extends TestCase {
+  private static final Log LOG = LogFactory.getLog(TestTextOutputFormat.class
+      .getName());
+
+  private static JobConf defaultConf = new JobConf();
+
+  private static FileSystem localFs = null;
+  static {
+    try {
+      localFs = FileSystem.getLocal(defaultConf);
+    } catch (IOException e) {
+      throw new RuntimeException("init failure", e);
+    }
+  }
+
+  private static Path workDir = new Path(new Path(System.getProperty(
+      "test.build.data", "."), "data"), "TestTextOutputFormat");
+
+  public void testFormat() throws Exception {
+    JobConf job = new JobConf();
+    job.setOutputPath(workDir);
+    String file = "test.txt";
+    
+    // A reporter that does nothing
+    Reporter reporter = Reporter.NULL;
+
+    TextOutputFormat theOutputFormat = new TextOutputFormat();
+    RecordWriter theRecodWriter = theOutputFormat.getRecordWriter(localFs, job,
+        file, reporter);
+
+    Text key1 = new Text("key1");
+    Text key2 = new Text("key2");
+    Text val1 = new Text("val1");
+    Text val2 = new Text("val2");
+
+    try {
+      theRecodWriter.write(key1, val1);
+      theRecodWriter.write(null, val1);
+      theRecodWriter.write(key1, null);
+      theRecodWriter.write(null, null);
+      theRecodWriter.write(key2, val2);
+
+    } finally {
+      theRecodWriter.close(reporter);
+    }
+    File expectedFile = new File(new Path(workDir, file).toString()); 
+    StringBuffer expectedOutput = new StringBuffer();
+    expectedOutput.append(key1).append('\t').append(val1).append("\n");
+    expectedOutput.append(val1).append("\n");
+    expectedOutput.append(key1).append("\n");
+    expectedOutput.append(key2).append('\t').append(val2).append("\n");
+    String output = UtilsForTests.slurp(expectedFile);
+    assertEquals(output, expectedOutput.toString());
+    
+  }
+
+  public static void main(String[] args) throws Exception {
+    new TestTextOutputFormat().testFormat();
+  }
+}

Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/UtilsForTests.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/UtilsForTests.java?view=auto&rev=527632
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/UtilsForTests.java (added)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/UtilsForTests.java Wed Apr 11 12:38:20 2007
@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.text.DecimalFormat;
+import java.io.*;
+import java.net.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.List;
+import java.util.jar.*;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+
+/** 
+ * Utilities used in unit test.
+ *  
+ */
+public class UtilsForTests {
+
+  final static long KB = 1024L * 1;
+  final static long MB = 1024L * KB;
+  final static long GB = 1024L * MB;
+  final static long TB = 1024L * GB;
+  final static long PB = 1024L * TB;
+
+  static DecimalFormat dfm = new DecimalFormat("####.000");
+  static DecimalFormat ifm = new DecimalFormat("###,###,###,###,###");
+
+  public static String dfmt(double d) {
+    return dfm.format(d);
+  }
+
+  public static String ifmt(double d) {
+    return ifm.format(d);
+  }
+
+  public static String formatBytes(long numBytes) {
+    StringBuffer buf = new StringBuffer();
+    boolean bDetails = true;
+    double num = numBytes;
+
+    if (numBytes < KB) {
+      buf.append(numBytes + " B");
+      bDetails = false;
+    } else if (numBytes < MB) {
+      buf.append(dfmt(num / KB) + " KB");
+    } else if (numBytes < GB) {
+      buf.append(dfmt(num / MB) + " MB");
+    } else if (numBytes < TB) {
+      buf.append(dfmt(num / GB) + " GB");
+    } else if (numBytes < PB) {
+      buf.append(dfmt(num / TB) + " TB");
+    } else {
+      buf.append(dfmt(num / PB) + " PB");
+    }
+    if (bDetails) {
+      buf.append(" (" + ifmt(numBytes) + " bytes)");
+    }
+    return buf.toString();
+  }
+
+  public static String formatBytes2(long numBytes) {
+    StringBuffer buf = new StringBuffer();
+    long u = 0;
+    if (numBytes >= TB) {
+      u = numBytes / TB;
+      numBytes -= u * TB;
+      buf.append(u + " TB ");
+    }
+    if (numBytes >= GB) {
+      u = numBytes / GB;
+      numBytes -= u * GB;
+      buf.append(u + " GB ");
+    }
+    if (numBytes >= MB) {
+      u = numBytes / MB;
+      numBytes -= u * MB;
+      buf.append(u + " MB ");
+    }
+    if (numBytes >= KB) {
+      u = numBytes / KB;
+      numBytes -= u * KB;
+      buf.append(u + " KB ");
+    }
+    buf.append(u + " B"); //even if zero
+    return buf.toString();
+  }
+
+  static final String regexpSpecials = "[]()?*+|.!^-\\~@";
+
+  public static String regexpEscape(String plain) {
+    StringBuffer buf = new StringBuffer();
+    char[] ch = plain.toCharArray();
+    int csup = ch.length;
+    for (int c = 0; c < csup; c++) {
+      if (regexpSpecials.indexOf(ch[c]) != -1) {
+        buf.append("\\");
+      }
+      buf.append(ch[c]);
+    }
+    return buf.toString();
+  }
+
+  public static String safeGetCanonicalPath(File f) {
+    try {
+      String s = f.getCanonicalPath();
+      return (s == null) ? f.toString() : s;
+    } catch (IOException io) {
+      return f.toString();
+    }
+  }
+
+  static String slurp(File f) throws IOException {
+    int len = (int) f.length();
+    byte[] buf = new byte[len];
+    FileInputStream in = new FileInputStream(f);
+    String contents = null;
+    try {
+      in.read(buf, 0, len);
+      contents = new String(buf, "UTF-8");
+    } finally {
+      in.close();
+    }
+    return contents;
+  }
+
+  static String slurpHadoop(Path p, FileSystem fs) throws IOException {
+    int len = (int) fs.getLength(p);
+    byte[] buf = new byte[len];
+    InputStream in = fs.open(p);
+    String contents = null;
+    try {
+      in.read(buf, 0, len);
+      contents = new String(buf, "UTF-8");
+    } finally {
+      in.close();
+    }
+    return contents;
+  }
+
+  public static String rjustify(String s, int width) {
+    if (s == null) s = "null";
+    if (width > s.length()) {
+      s = getSpace(width - s.length()) + s;
+    }
+    return s;
+  }
+
+  public static String ljustify(String s, int width) {
+    if (s == null) s = "null";
+    if (width > s.length()) {
+      s = s + getSpace(width - s.length());
+    }
+    return s;
+  }
+
+  static char[] space;
+  static {
+    space = new char[300];
+    Arrays.fill(space, '\u0020');
+  }
+
+  public static String getSpace(int len) {
+    if (len > space.length) {
+      space = new char[Math.max(len, 2 * space.length)];
+      Arrays.fill(space, '\u0020');
+    }
+    return new String(space, 0, len);
+  }
+}