hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dhr...@apache.org
Subject svn commit: r597211 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/fs/FsShell.java src/test/org/apache/hadoop/dfs/TestDFSShell.java
Date Wed, 21 Nov 2007 21:14:34 GMT
Author: dhruba
Date: Wed Nov 21 13:14:33 2007
New Revision: 597211

URL: http://svn.apache.org/viewvc?rev=597211&view=rev
Log:
HADOOP-2113. A new shell command "dfs -text" to view the contents of
a gziped or SequenceFile. (Chris Douglas via dhruba)


Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FsShell.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestDFSShell.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=597211&r1=597210&r2=597211&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed Nov 21 13:14:33 2007
@@ -72,6 +72,9 @@
     HADOOP-2127. Added a pipes sort example to benchmark trivial pipes
     application versus trivial java application. (omalley via acmurthy)
 
+    HADOOP-2113. A new shell command "dfs -text" to view the contents of
+    a gziped or SequenceFile. (Chris Douglas via dhruba)
+
   OPTIMIZATIONS
 
     HADOOP-1898.  Release the lock protecting the last time of the last stack

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FsShell.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FsShell.java?rev=597211&r1=597210&r2=597211&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FsShell.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FsShell.java Wed Nov 21 13:14:33 2007
@@ -19,15 +19,23 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
 import java.text.DecimalFormat;
 import java.text.SimpleDateFormat;
 import java.util.*;
+import java.util.zip.GZIPInputStream;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 
@@ -92,11 +100,7 @@
   /** 
    * Print from src to stdout.
    */
-  private void printToStdout(Path src) throws IOException {
-    if (fs.isDirectory(src)) {
-      throw new IOException("Source must be a file.");
-    }
-    FSDataInputStream in = fs.open(src);
+  private void printToStdout(InputStream in) throws IOException {
     IOUtils.copyBytes(in, System.out, getConf(), true);
   }
 
@@ -286,11 +290,81 @@
     new DelayedExceptionThrowing() {
       @Override
       void process(Path p) throws IOException {
-        printToStdout(p);
+        if (fs.isDirectory(p)) {
+          throw new IOException("Source must be a file.");
+        }
+        printToStdout(fs.open(p));
       }
     }.process(srcf);
   }
-    
+
+  private class TextRecordInputStream extends InputStream {
+    SequenceFile.Reader r;
+    WritableComparable key;
+    Writable val;
+
+    DataInputBuffer inbuf;
+    DataOutputBuffer outbuf;
+
+    public TextRecordInputStream(FileStatus f) throws IOException {
+      r = new SequenceFile.Reader(fs, f.getPath(), getConf());
+      key = (WritableComparable)ReflectionUtils.newInstance(
+          r.getKeyClass(), getConf());
+      val = (Writable)ReflectionUtils.newInstance(
+          r.getValueClass(), getConf());
+      inbuf = new DataInputBuffer();
+      outbuf = new DataOutputBuffer();
+    }
+
+    public int read() throws IOException {
+      int ret;
+      if (null == inbuf || -1 == (ret = inbuf.read())) {
+        if (!r.next(key, val)) {
+          return -1;
+        }
+        byte[] tmp = key.toString().getBytes();
+        outbuf.write(tmp, 0, tmp.length);
+        outbuf.write('\t');
+        tmp = val.toString().getBytes();
+        outbuf.write(tmp, 0, tmp.length);
+        outbuf.write('\n');
+        inbuf.reset(outbuf.getData(), outbuf.getLength());
+        outbuf.reset();
+        ret = inbuf.read();
+      }
+      return ret;
+    }
+  }
+
+  private InputStream forMagic(Path p) throws IOException {
+    FSDataInputStream i = fs.open(p);
+    switch(i.readShort()) {
+      case 0x1f8b: // RFC 1952
+        i.seek(0);
+        return new GZIPInputStream(i);
+      case 0x5345: // 'S' 'E'
+        if (i.readByte() == 'Q') {
+          i.close();
+          return new TextRecordInputStream(fs.getFileStatus(p));
+        }
+        break;
+    }
+    i.seek(0);
+    return i;
+  }
+
+  void text(String srcf) throws IOException {
+    new DelayedExceptionThrowing() {
+      @Override
+      void process(Path p) throws IOException {
+        if (fs.isDirectory(p)) {
+          throw new IOException("Source must be a file.");
+        }
+        printToStdout(forMagic(p));
+      }
+    }.process(srcf);
+  }
+
   /**
    * Parse the args of a command and check the format of args.
    */
@@ -961,7 +1035,7 @@
       "[-copyToLocal <src><localdst>] [-moveToLocal <src> <localdst>]\n\t"
+
       "[-mkdir <path>] [-report] [" + SETREP_SHORT_USAGE + "]\n\t" +
       "[-touchz <path>] [-test -[ezd] <path>] [-stat [format] <path>]\n\t"
+
-      "[-tail [-f] <path>]\n\t" +
+      "[-tail [-f] <path>] [-text <path>]\n\t" +
       "[-help [cmd]]\n";
 
     String conf ="-conf <configuration file>:  Specify an application configuration
file.";
@@ -1039,6 +1113,10 @@
 
     String cat = "-cat <src>: \tFetch all files that match the file pattern <src>
\n" +
       "\t\tand display their content on stdout.\n";
+
+    String text = "-text <path>: Attempt to decode contents if the first few bytes\n"
+
+      "\t\tmatch a magic number associated with a known format\n" +
+      "\t\t(gzip, SequenceFile)\n";
         
     String copyToLocal = "-copyToLocal <src> <localdst>:  Identical to the -get
command.\n";
 
@@ -1183,6 +1261,8 @@
           ls(argv[i], true);
         } else if ("-touchz".equals(cmd)) {
           touchz(argv[i]);
+        } else if ("-text".equals(cmd)) {
+          text(argv[i]);
         }
       } catch (RemoteException e) {
         //
@@ -1228,7 +1308,8 @@
     } else if ("-ls".equals(cmd) || "-lsr".equals(cmd) ||
                "-du".equals(cmd) || "-dus".equals(cmd) ||
                "-rm".equals(cmd) || "-rmr".equals(cmd) ||
-               "-touchz".equals(cmd) || "-mkdir".equals(cmd)) {
+               "-touchz".equals(cmd) || "-mkdir".equals(cmd) ||
+               "-text".equals(cmd)) {
       System.err.println("Usage: java FsShell" + 
                          " [" + cmd + " <path>]");
     } else if ("-mv".equals(cmd) || "-cp".equals(cmd)) {
@@ -1275,6 +1356,7 @@
       System.err.println("           [-get [-crc] <src> <localdst>]");
       System.err.println("           [-getmerge <src> <localdst> [addnl]]");
       System.err.println("           [-cat <src>]");
+      System.err.println("           [-text <src>]");
       System.err.println("           [-copyToLocal [-crc] <src> <localdst>]");
       System.err.println("           [-moveToLocal [-crc] <src> <localdst>]");
       System.err.println("           [-mkdir <path>]");
@@ -1325,7 +1407,8 @@
       }
     } else if ("-rm".equals(cmd) || "-rmr".equals(cmd) ||
                "-cat".equals(cmd) || "-mkdir".equals(cmd) ||
-               "-touchz".equals(cmd) || "-stat".equals(cmd)) {
+               "-touchz".equals(cmd) || "-stat".equals(cmd) ||
+               "-text".equals(cmd)) {
       if (argv.length < 2) {
         printUsage(cmd);
         return exitCode;
@@ -1358,6 +1441,8 @@
         else
           copyMergeToLocal(argv[i++], new Path(argv[i++]));
       } else if ("-cat".equals(cmd)) {
+        exitCode = doall(cmd, argv, getConf(), i);
+      } else if ("-text".equals(cmd)) {
         exitCode = doall(cmd, argv, getConf(), i);
       } else if ("-moveToLocal".equals(cmd)) {
         moveToLocal(argv[i++], new Path(argv[i++]));

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestDFSShell.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestDFSShell.java?rev=597211&r1=597210&r2=597211&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestDFSShell.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestDFSShell.java Wed Nov 21 13:14:33
2007
@@ -21,10 +21,12 @@
 import java.io.*;
 import java.security.*;
 import java.util.*;
+import java.util.zip.GZIPOutputStream;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.ToolRunner;
 
 /**
  * This class tests commands from DFSShell.
@@ -169,6 +171,50 @@
     } finally {
       try {dfs.close();} catch (Exception e) {}
       cluster.shutdown();
+    }
+  }
+
+  public void testText() throws Exception {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = null;
+    PrintStream bak = null;
+    try {
+      cluster = new MiniDFSCluster(conf, 2, true, null);
+      FileSystem fs = cluster.getFileSystem();
+      Path root = new Path("/texttest");
+      fs.mkdirs(root);
+      OutputStream zout = new GZIPOutputStream(
+          fs.create(new Path(root, "file.gz")));
+      Random r = new Random();
+      ByteArrayOutputStream file = new ByteArrayOutputStream();
+      for (int i = 0; i < 1024; ++i) {
+        char c = Character.forDigit(r.nextInt(26) + 10, 36);
+        file.write(c);
+        zout.write(c);
+      }
+      zout.close();
+
+      bak = System.out;
+      ByteArrayOutputStream out = new ByteArrayOutputStream();
+      System.setOut(new PrintStream(out));
+
+      String[] argv = new String[2];
+      argv[0] = "-text";
+      argv[1] = new Path(root, "file.gz").toUri().getPath();
+      int ret = ToolRunner.run(new FsShell(), argv);
+      assertTrue("-text returned -1", 0 >= ret);
+      file.reset();
+      out.reset();
+      assertTrue("Output doesn't match input",
+          Arrays.equals(file.toByteArray(), out.toByteArray()));
+
+    } finally {
+      if (null != bak) {
+        System.setOut(bak);
+      }
+      if (null != cluster) {
+        cluster.shutdown();
+      }
     }
   }
 



Mime
View raw message