hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zs...@apache.org
Subject svn commit: r930059 - in /hadoop/hive/trunk: ./ data/files/ ql/src/java/org/apache/hadoop/hive/ql/io/ ql/src/test/org/apache/hadoop/hive/ql/io/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Thu, 01 Apr 2010 18:24:14 GMT
Author: zshao
Date: Thu Apr  1 18:24:14 2010
New Revision: 930059

URL: http://svn.apache.org/viewvc?rev=930059&view=rev
Log:
HIVE-1272. Add SymlinkTextInputFormat to Hive. (Guanghao Shen via zshao)

Added:
    hadoop/hive/trunk/data/files/symlink1.txt
    hadoop/hive/trunk/data/files/symlink2.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/symlink_text_input_format.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/symlink_text_input_format.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=930059&r1=930058&r2=930059&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Apr  1 18:24:14 2010
@@ -63,6 +63,9 @@ Trunk -  Unreleased
     HIVE-1219. Robust handling of metastore connection failures
     (Paul Yang via namit)
 
+    HIVE-1272. Add SymlinkTextInputFormat to Hive.
+    (Guanghao Shen via zshao)
+
   IMPROVEMENTS
     HIVE-983. Function from_unixtime takes long.
     (Ning Zhang via zshao)

Added: hadoop/hive/trunk/data/files/symlink1.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/data/files/symlink1.txt?rev=930059&view=auto
==============================================================================
--- hadoop/hive/trunk/data/files/symlink1.txt (added)
+++ hadoop/hive/trunk/data/files/symlink1.txt Thu Apr  1 18:24:14 2010
@@ -0,0 +1,2 @@
+../data/files/T1.txt
+../data/files/T3.txt

Added: hadoop/hive/trunk/data/files/symlink2.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/data/files/symlink2.txt?rev=930059&view=auto
==============================================================================
--- hadoop/hive/trunk/data/files/symlink2.txt (added)
+++ hadoop/hive/trunk/data/files/symlink2.txt Thu Apr  1 18:24:14 2010
@@ -0,0 +1 @@
+../data/files/T2.txt

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=930059&r1=930058&r2=930059&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
(original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Thu Apr  1 18:24:14 2010
@@ -29,6 +29,8 @@ import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -39,15 +41,13 @@ import org.apache.hadoop.hive.shims.Hado
 import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.io.compress.CompressionCodecFactory;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
 
 
 /**
@@ -224,8 +224,9 @@ public class CombineHiveInputFormat<K ex
     CombineFileInputFormatShim combine = ShimLoader.getHadoopShims()
         .getCombineFileInputFormat();
 
-    if (combine == null)
+    if (combine == null) {
       return super.getSplits(job, numSplits);
+    }
 
     if (combine.getInputPathsShim(job).length == 0) {
       throw new IOException("No input paths specified in job");
@@ -271,6 +272,10 @@ public class CombineHiveInputFormat<K ex
         return super.getSplits(job, numSplits);
       }
 
+      if (inputFormat instanceof SymlinkTextInputFormat) {
+        return super.getSplits(job, numSplits);
+      }
+
       combine.createPool(job, new CombineFilter(path));
     }
 

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java?rev=930059&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java
(added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java
Thu Apr  1 18:24:14 2010
@@ -0,0 +1,184 @@
+package org.apache.hadoop.hive.ql.io;
+
+import java.io.BufferedReader;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+
+/**
+ * Symlink file is a text file which contains a list of filename / dirname.
+ * This input method reads symlink files from specified job input paths and
+ * takes the files / directories specified in those symlink files as
+ * actual map-reduce input. The target input data should be in TextInputFormat.
+ */
+@SuppressWarnings("deprecation")
+public class SymlinkTextInputFormat
+    implements InputFormat<LongWritable, Text>, JobConfigurable {
+  /**
+   * This input split wraps the FileSplit generated from
+   * TextInputFormat.getSplits(), while setting the original link file path
+   * as job input path. This is needed because MapOperator relies on the
+   * job input path to lookup correct child operators. The target data file
+   * is encapsulated in the wrapped FileSplit.
+   */
+  public static class SymlinkTextInputSplit extends FileSplit {
+    private final FileSplit split;
+
+    public SymlinkTextInputSplit() {
+      super((Path)null, 0, 0, (String[])null);
+      split = new FileSplit((Path)null, 0, 0, (String[])null);
+    }
+
+    public SymlinkTextInputSplit(Path symlinkPath, FileSplit split) throws IOException {
+      super(symlinkPath, 0, 0, split.getLocations());
+      this.split = split;
+    }
+
+    /**
+     * Gets the target split, i.e. the split of target data.
+     */
+    public FileSplit getTargetSplit() {
+      return split;
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+      super.write(out);
+      split.write(out);
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      super.readFields(in);
+      split.readFields(in);
+    }
+  }
+
+  @Override
+  public RecordReader<LongWritable, Text> getRecordReader(
+      InputSplit split, JobConf job, Reporter reporter) throws IOException {
+    InputSplit targetSplit = ((SymlinkTextInputSplit)split).getTargetSplit();
+
+    // The target data is in TextInputFormat.
+    TextInputFormat inputFormat = new TextInputFormat();
+    inputFormat.configure(job);
+    return inputFormat.getRecordReader(targetSplit, job, reporter);
+  }
+
+  /**
+   * Parses all target paths from job input directory which contains symlink
+   * files, and splits the target data using TextInputFormat.
+   */
+  @Override
+  public InputSplit[] getSplits(JobConf job, int numSplits)
+      throws IOException {
+    Path[] symlinksDirs = FileInputFormat.getInputPaths(job);
+    if (symlinksDirs.length == 0) {
+      throw new IOException("No input paths specified in job.");
+    }
+
+    // Get all target paths first, because the number of total target paths
+    // is used to determine number of splits of each target path.
+    List<Path> targetPaths = new ArrayList<Path>();
+    List<Path> symlinkPaths = new ArrayList<Path>();
+    try {
+      getTargetPathsFromSymlinksDirs(
+          job,
+          symlinksDirs,
+          targetPaths,
+          symlinkPaths);
+    } catch (Exception e) {
+      throw new IOException(
+          "Error parsing symlinks from specified job input path.", e);
+    }
+    if (targetPaths.size() == 0) {
+      return new InputSplit[0];
+    }
+
+    // The input should be in TextInputFormat.
+    TextInputFormat inputFormat = new TextInputFormat();
+    JobConf newjob = new JobConf(job);
+    newjob.setInputFormat(TextInputFormat.class);
+    inputFormat.configure(newjob);
+
+    List<InputSplit> result = new ArrayList<InputSplit>();
+
+    // ceil(numSplits / numPaths), so we can get at least numSplits splits.
+    int numPaths = targetPaths.size();
+    int numSubSplits = (numSplits + numPaths - 1) / numPaths;
+
+    // For each path, do getSplits().
+    for (int i = 0; i < numPaths; ++i) {
+      Path targetPath = targetPaths.get(i);
+      Path symlinkPath = symlinkPaths.get(i);
+
+      FileInputFormat.setInputPaths(newjob, targetPath);
+
+      InputSplit[] iss = inputFormat.getSplits(newjob, numSubSplits);
+      for (InputSplit is : iss) {
+        result.add(new SymlinkTextInputSplit(symlinkPath, (FileSplit)is));
+      }
+    }
+    return result.toArray(new InputSplit[result.size()]);
+  }
+
+  @Override
+  public void configure(JobConf job) {
+    // empty
+  }
+
+  /**
+   * Given list of directories containing symlink files, read all target
+   * paths from symlink files and return as targetPaths list. And for each
+   * targetPaths[i], symlinkPaths[i] will be the path to the symlink file
+   * containing the target path.
+   */
+  private static void getTargetPathsFromSymlinksDirs(
+      Configuration conf, Path[] symlinksDirs,
+      List<Path> targetPaths, List<Path> symlinkPaths) throws IOException {
+    for (Path symlinkDir : symlinksDirs) {
+      FileSystem fileSystem = symlinkDir.getFileSystem(conf);
+      FileStatus[] symlinks = fileSystem.listStatus(symlinkDir);
+
+      // Read paths from each symlink file.
+      for (FileStatus symlink : symlinks) {
+        BufferedReader reader =
+            new BufferedReader(
+                new InputStreamReader(
+                    fileSystem.open(symlink.getPath())));
+
+        String line;
+        while ((line = reader.readLine()) != null) {
+          targetPaths.add(new Path(line));
+          symlinkPaths.add(symlink.getPath());
+        }
+      }
+    }
+  }
+
+  /**
+   * For backward compatibility with hadoop 0.17.
+   */
+  public void validateInput(JobConf job) throws IOException {
+    // do nothing
+  }
+}

Added: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java?rev=930059&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
(added)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
Thu Apr  1 18:24:14 2010
@@ -0,0 +1,196 @@
+package org.apache.hadoop.hive.ql.io;
+
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * Unittest for SymlinkTextInputFormat.
+ */
+@SuppressWarnings("deprecation")
+public class TestSymlinkTextInputFormat extends TestCase {
+  private static Log log =
+      LogFactory.getLog(TestSymlinkTextInputFormat.class);
+
+  private Configuration conf;
+  private JobConf job;
+  private FileSystem fileSystem;
+  private Path testDir;
+  Reporter reporter;
+
+  private Path dataDir1;
+  private Path dataDir2;
+  private Path symlinkDir;
+
+  @Override
+  protected void setUp() throws IOException {
+    conf = new Configuration();
+    job = new JobConf(conf);
+    fileSystem = FileSystem.getLocal(conf);
+    testDir = new Path(System.getProperty("test.data.dir", ".") +
+                       "/TestSymlinkTextInputFormat");
+    reporter = Reporter.NULL;
+    fileSystem.delete(testDir, true);
+
+    dataDir1 = new Path(testDir, "datadir1");
+    dataDir2 = new Path(testDir, "datadir2");
+    symlinkDir = new Path(testDir, "symlinkdir");
+  }
+
+  @Override
+  protected void tearDown() throws IOException {
+    fileSystem.delete(testDir, true);
+  }
+
+  /**
+   * Test scenario: Two data directories, one symlink file that contains two
+   * paths each point to a file in one of data directories.
+   */
+  public void testAccuracy1() throws IOException {
+    // First data dir, contains 2 files.
+    writeTextFile(new Path(dataDir1, "file1"),
+                  "dir1_file1_line1\n" +
+                  "dir1_file1_line2\n");
+    writeTextFile(new Path(dataDir1, "file2"),
+                  "dir1_file2_line1\n" +
+                  "dir1_file2_line2\n");
+
+    // Second data dir, contains 2 files.
+    writeTextFile(new Path(dataDir2, "file1"),
+                  "dir2_file1_line1\n" +
+                  "dir2_file1_line2\n");
+    writeTextFile(new Path(dataDir2, "file2"),
+                  "dir2_file2_line1\n" +
+                  "dir2_file2_line2\n");
+
+    // A symlink file, contains first file from first dir and second file from
+    // second dir.
+    writeSymlinkFile(
+        new Path(symlinkDir, "symlink_file"),
+        new Path(dataDir1, "file1"),
+        new Path(dataDir2, "file2"));
+
+    FileInputFormat.setInputPaths(job, symlinkDir);
+
+    SymlinkTextInputFormat inputFormat = new SymlinkTextInputFormat();
+    InputSplit[] splits = inputFormat.getSplits(job, 2);
+
+    log.info("Number of splits: " + splits.length);
+
+    // Read all values.
+    List<String> received = new ArrayList<String>();
+    for (InputSplit split : splits) {
+      RecordReader<LongWritable, Text> reader =
+          inputFormat.getRecordReader(split, job, reporter);
+
+      LongWritable key = reader.createKey();
+      Text value = reader.createValue();
+      while (reader.next(key, value)) {
+        received.add(value.toString());
+      }
+    }
+
+    List<String> expected = new ArrayList<String>();
+    expected.add("dir1_file1_line1");
+    expected.add("dir1_file1_line2");
+    expected.add("dir2_file2_line1");
+    expected.add("dir2_file2_line2");
+
+    assertEquals(expected, received);
+  }
+
+  /**
+   * Scenario: Empty input directory, i.e. no symlink file.
+   *
+   * Expected: Should return empty result set without any exception.
+   */
+  public void testAccuracy2() throws IOException {
+    fileSystem.mkdirs(symlinkDir);
+
+    FileInputFormat.setInputPaths(job, symlinkDir);
+
+    SymlinkTextInputFormat inputFormat = new SymlinkTextInputFormat();
+    InputSplit[] splits = inputFormat.getSplits(job, 2);
+
+    log.info("Number of splits: " + splits.length);
+
+    // Read all values.
+    List<String> received = new ArrayList<String>();
+    for (InputSplit split : splits) {
+      RecordReader<LongWritable, Text> reader =
+          inputFormat.getRecordReader(split, job, reporter);
+
+      LongWritable key = reader.createKey();
+      Text value = reader.createValue();
+      while (reader.next(key, value)) {
+        received.add(value.toString());
+      }
+    }
+
+    List<String> expected = new ArrayList<String>();
+
+    assertEquals(expected, received);
+  }
+
+  /**
+   * Scenario: No job input paths.
+   * Expected: IOException with proper message.
+   */
+  public void testFailure() {
+    SymlinkTextInputFormat inputFormat = new SymlinkTextInputFormat();
+
+    try {
+      inputFormat.getSplits(job, 2);
+      fail("IOException expected if no job input paths specified.");
+    } catch (IOException e) {
+      assertEquals("Incorrect exception message for no job input paths error.",
+                   "No input paths specified in job.",
+                   e.getMessage());
+    }
+  }
+
+  /**
+   * Writes the given string to the given file.
+   */
+  private void writeTextFile(Path file, String content) throws IOException {
+    OutputStreamWriter writer = new OutputStreamWriter(fileSystem.create(file));
+    writer.write(content);
+    writer.close();
+  }
+
+  /**
+   * Writes a symlink file that contains given list of paths.
+   *
+   * @param symlinkFile
+   * The symlink file to write.
+   *
+   * @param paths
+   * The list of paths to write to the symlink file.
+   */
+  private void writeSymlinkFile(Path symlinkFile, Path...paths)
+      throws IOException {
+    OutputStreamWriter writer =
+        new OutputStreamWriter(fileSystem.create(symlinkFile));
+    for (Path path : paths) {
+      writer.write(path.toString());
+      writer.write("\n");
+    }
+    writer.close();
+  }
+}

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/symlink_text_input_format.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/symlink_text_input_format.q?rev=930059&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/symlink_text_input_format.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/symlink_text_input_format.q Thu Apr
 1 18:24:14 2010
@@ -0,0 +1,24 @@
+DROP TABLE symlink_text_input_format;
+
+EXPLAIN
+CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat';
+
+CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat';
+
+dfs -cp ../data/files/symlink1.txt ../build/ql/test/data/warehouse/symlink_text_input_format/symlink1.txt;
+dfs -cp ../data/files/symlink2.txt ../build/ql/test/data/warehouse/symlink_text_input_format/symlink2.txt;
+
+EXPLAIN SELECT * FROM symlink_text_input_format;
+
+SELECT * FROM symlink_text_input_format;
+
+EXPLAIN SELECT value FROM symlink_text_input_format;
+
+SELECT value FROM symlink_text_input_format;
+
+EXPLAIN SELECT count(1) FROM symlink_text_input_format;
+
+SELECT count(1) FROM symlink_text_input_format;
+
+DROP TABLE symlink_text_input_format;
+

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/symlink_text_input_format.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/symlink_text_input_format.q.out?rev=930059&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/symlink_text_input_format.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/symlink_text_input_format.q.out Thu
Apr  1 18:24:14 2010
@@ -0,0 +1,202 @@
+PREHOOK: query: DROP TABLE symlink_text_input_format
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE symlink_text_input_format
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: EXPLAIN
+CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: EXPLAIN
+CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+POSTHOOK: type: CREATETABLE
+ABSTRACT SYNTAX TREE:
+  (TOK_CREATETABLE symlink_text_input_format TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key
TOK_STRING) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEFILEFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat'
'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'))
+
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+      Create Table Operator:
+        Create Table
+          columns: key string, value string
+          if not exists: false
+          input format: org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat
+          # buckets: -1
+          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+          name: symlink_text_input_format
+          isExternal: false
+
+
+PREHOOK: query: CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED
AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED
AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@symlink_text_input_format
+PREHOOK: query: EXPLAIN SELECT * FROM symlink_text_input_format
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM symlink_text_input_format
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF symlink_text_input_format)) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT * FROM symlink_text_input_format
+PREHOOK: type: QUERY
+PREHOOK: Input: default@symlink_text_input_format
+PREHOOK: Output: file:/data/users/guanghao/w1/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-03-31_16-09-55_412_372868623647124211/10000
+POSTHOOK: query: SELECT * FROM symlink_text_input_format
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@symlink_text_input_format
+POSTHOOK: Output: file:/data/users/guanghao/w1/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-03-31_16-09-55_412_372868623647124211/10000
+1	11
+2	12
+3	13
+7	17
+8	18
+8	28
+2	12
+4	14
+6	16
+7	17
+2	22
+3	13
+4	14
+5	15
+8	18
+8	18
+PREHOOK: query: EXPLAIN SELECT value FROM symlink_text_input_format
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT value FROM symlink_text_input_format
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF symlink_text_input_format)) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        symlink_text_input_format 
+          TableScan
+            alias: symlink_text_input_format
+            Select Operator
+              expressions:
+                    expr: value
+                    type: string
+              outputColumnNames: _col0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT value FROM symlink_text_input_format
+PREHOOK: type: QUERY
+PREHOOK: Input: default@symlink_text_input_format
+PREHOOK: Output: file:/data/users/guanghao/w1/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-03-31_16-09-55_939_1571446349980486687/10000
+POSTHOOK: query: SELECT value FROM symlink_text_input_format
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@symlink_text_input_format
+POSTHOOK: Output: file:/data/users/guanghao/w1/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-03-31_16-09-55_939_1571446349980486687/10000
+11
+12
+13
+17
+18
+28
+12
+14
+16
+17
+22
+13
+14
+15
+18
+18
+PREHOOK: query: EXPLAIN SELECT count(1) FROM symlink_text_input_format
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT count(1) FROM symlink_text_input_format
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF symlink_text_input_format)) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        symlink_text_input_format 
+          TableScan
+            alias: symlink_text_input_format
+            Select Operator
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: false
+                mode: hash
+                outputColumnNames: _col0
+                Reduce Output Operator
+                  sort order: 
+                  tag: -1
+                  value expressions:
+                        expr: _col0
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          mode: mergepartial
+          outputColumnNames: _col0
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: bigint
+            outputColumnNames: _col0
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT count(1) FROM symlink_text_input_format
+PREHOOK: type: QUERY
+PREHOOK: Input: default@symlink_text_input_format
+PREHOOK: Output: file:/data/users/guanghao/w1/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-03-31_16-09-59_427_4904352844938482139/10000
+POSTHOOK: query: SELECT count(1) FROM symlink_text_input_format
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@symlink_text_input_format
+POSTHOOK: Output: file:/data/users/guanghao/w1/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-03-31_16-09-59_427_4904352844938482139/10000
+16
+PREHOOK: query: DROP TABLE symlink_text_input_format
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE symlink_text_input_format
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@symlink_text_input_format



Mime
View raw message