hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nzh...@apache.org
Subject svn commit: r965708 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/test/org/apache/hadoop/hive/ql/exec/ ql/src/test/results/clientpositive/
Date Tue, 20 Jul 2010 01:24:29 GMT
Author: nzhang
Date: Tue Jul 20 01:24:29 2010
New Revision: 965708

URL: http://svn.apache.org/viewvc?rev=965708&view=rev
Log:
HIVE-1463. hive output file names are unnecessarily large

Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/build-common.xml
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
    hadoop/hive/trunk/ql/src/test/results/clientpositive/sample10.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Jul 20 01:24:29 2010
@@ -35,6 +35,9 @@ Trunk -  Unreleased
     Operator.forward()
     (Yongqiang He via Ning Zhang)
 
+    HIVE-1463. Hive output file names are unnecessarily large
+    (Joydeep Sen Sarma via Ning Zhang)
+
   OPTIMIZATIONS
 
   BUG FIXES

Modified: hadoop/hive/trunk/build-common.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build-common.xml?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/build-common.xml (original)
+++ hadoop/hive/trunk/build-common.xml Tue Jul 20 01:24:29 2010
@@ -407,6 +407,7 @@
       <jvmarg value="-Xdebug"/>
       <jvmarg value="-Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=y"/>
-->
       <env key="HADOOP_HOME" value="${hadoop.root}"/>
+      <env key="HADOOP_CLASSPATH" value="${test.data.dir}/conf"/>
       <env key="TZ" value="US/Pacific"/>
       <sysproperty key="test.output.overwrite" value="${overwrite}"/>
       <sysproperty key="test.service.standalone.server" value="${standalone}"/>

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Tue Jul 20
01:24:29 2010
@@ -278,8 +278,8 @@ public final class Utilities {
 
       if(!HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJT).equals("local")) {
         // use the default file system of the job
-        FileSystem fs = FileSystem.get(job);
         Path planPath = new Path(hiveScratchDir, "plan." + randGen.nextInt());
+        FileSystem fs = planPath.getFileSystem(job);
         FSDataOutputStream out = fs.create(planPath);
         serializeMapRedWork(w, out);
         HiveConf.setVar(job, HiveConf.ConfVars.PLAN, planPath.toString());
@@ -467,9 +467,16 @@ public final class Utilities {
   public static String getTaskId(Configuration hconf) {
     String taskid = (hconf == null) ? null : hconf.get("mapred.task.id");
     if ((taskid == null) || taskid.equals("")) {
-      return ("" + randGen.nextInt());
+      return ("" + Math.abs(randGen.nextInt()));
     } else {
-      return taskid.replaceAll("task_[0-9]+_", "");
+       /* extract the task and attempt id from the hadoop taskid.
+          in version 17 the leading component was 'task_'. thereafter
+          the leading component is 'attempt_'. in 17 - hadoop also 
+          seems to have used _map_ and _reduce_ to denote map/reduce
+          task types
+       */
+      String ret = taskid.replaceAll(".*_[mr]_", "").replaceAll(".*_(map|reduce)_", "");
+      return (ret);
     }
   }
 
@@ -958,31 +965,28 @@ public final class Utilities {
 
   /**
    * The first group will contain the task id. The second group is the optional
-   * extension. The file name looks like: "24931_r_000000_0" or
-   * "24931_r_000000_0.gz"
+   * extension. The file name looks like: "0_0" or "0_0.gz". There may be a leading
+   * prefix (tmp_). Since getTaskId() can return an integer only - this should match
+   * a pure integer as well
    */
-  private static Pattern fileNameTaskIdRegex = Pattern.compile("^.*_([0-9]*)_[0-9](\\..*)?$");
+  private static Pattern fileNameTaskIdRegex = Pattern.compile("^.*?([0-9]+)(_[0-9])?(\\..*)?$");
 
   /**
-   * Local job name looks like "job_local_1_map_0000", where 1 is job ID and 0000 is task
ID.
-   */
-  private static Pattern fileNameLocalTaskIdRegex = Pattern.compile(".*local.*_([0-9]*)$");
-
-  /**
-   * Get the task id from the filename. E.g., get "000000" out of
-   * "24931_r_000000_0" or "24931_r_000000_0.gz"
+   * Get the task id from the filename.
+   * It is assumed that the filename is derived from the output of getTaskId
+   *
+   * @param filename filename to extract taskid from
    */
   public static String getTaskIdFromFilename(String filename) {
     String taskId = filename;
-    Matcher m = fileNameTaskIdRegex.matcher(filename);
+    int dirEnd = filename.lastIndexOf(Path.SEPARATOR);
+    if (dirEnd != -1)
+      taskId = filename.substring(dirEnd + 1);
+
+    Matcher m = fileNameTaskIdRegex.matcher(taskId);
     if (!m.matches()) {
-      Matcher m2 = fileNameLocalTaskIdRegex.matcher(filename);
-      if (!m2.matches()) {
-        LOG.warn("Unable to get task id from file name: " + filename
-            + ". Using full filename as task id.");
-      } else {
-        taskId = m2.group(1);
-      }
+      LOG.warn("Unable to get task id from file name: " + filename
+               + ". Using last component" + taskId + " as task id.");
     } else {
       taskId = m.group(1);
     }
@@ -991,17 +995,21 @@ public final class Utilities {
   }
 
   /**
-   * Replace the task id from the filename. E.g., replace "000000" out of
-   * "24931_r_000000_0" or "24931_r_000000_0.gz" by 33 to
-   * "24931_r_000033_0" or "24931_r_000033_0.gz"
+   * Replace the task id from the filename.
+   * It is assumed that the filename is derived from the output of getTaskId
+   *
+   * @param filename filename to replace taskid
+   * "0_0" or "0_0.gz" by 33 to
+   * "33_0" or "33_0.gz"
    */
   public static String replaceTaskIdFromFilename(String filename, int bucketNum) {
     String taskId = getTaskIdFromFilename(filename);
     String newTaskId = replaceTaskId(taskId, bucketNum);
-    return replaceTaskIdFromFilename(filename, taskId, newTaskId);
+    String ret =  replaceTaskIdFromFilename(filename, taskId, newTaskId);
+    return (ret);
   }
 
-  public static String replaceTaskId(String taskId, int bucketNum) {
+  private static String replaceTaskId(String taskId, int bucketNum) {
     String strBucketNum = String.valueOf(bucketNum);
     int bucketNumLen = strBucketNum.length();
     int taskIdLen = taskId.length();
@@ -1020,7 +1028,7 @@ public final class Utilities {
    * @param newTaskId
    * @return
    */
-  public static String replaceTaskIdFromFilename(String filename,
+  private static String replaceTaskIdFromFilename(String filename,
       String oldTaskId, String newTaskId) {
 
     String[] spl = filename.split(oldTaskId);

Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java Tue Jul
20 01:24:29 2010
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.exec;
 
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileOutputStream;
 import java.util.ArrayList;
 import java.util.LinkedList;
 
@@ -50,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.Pl
 import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
 import org.apache.hadoop.hive.ql.plan.ScriptDesc;
 import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.DriverContext;
 import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.mapred.TextInputFormat;
@@ -433,33 +433,13 @@ public class TestExecDriver extends Test
     mr.setReducer(op5);
   }
 
-  private File generatePlanFile() throws Exception {
-    File scratchDir = new File((new HiveConf(TestExecDriver.class))
-        .getVar(ConfVars.SCRATCHDIR));
-    File planFile = File.createTempFile("plan", ".xml", scratchDir);
-    System.out.println("Generating plan file " + planFile.toString());
-    FileOutputStream out = new FileOutputStream(planFile);
-    Utilities.serializeMapRedWork(mr, out);
-    return planFile;
-  }
-
-  private void executePlan(File planFile) throws Exception {
+  private void executePlan() throws Exception {
     String testName = new Exception().getStackTrace()[1].getMethodName();
-    String cmdLine = conf.getVar(HiveConf.ConfVars.HADOOPBIN) + " jar "
-        + conf.getJar() + " org.apache.hadoop.hive.ql.exec.ExecDriver -plan "
-        + planFile.toString() + " " + ExecDriver.generateCmdLine(conf);
-    System.out.println("Executing: " + cmdLine);
-    Process executor = Runtime.getRuntime().exec(cmdLine);
-
-    StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(),
-        null, System.out);
-    StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(),
-        null, System.err);
-
-    outPrinter.start();
-    errPrinter.start();
-
-    int exitVal = executor.waitFor();
+    MapRedTask mrtask = new MapRedTask();
+    DriverContext dctx = new DriverContext ();
+    mrtask.setWork(mr);
+    mrtask.initialize(conf, null, dctx);
+    int exitVal =  mrtask.execute(dctx);
 
     if (exitVal != 0) {
       System.out.println(testName + " execution failed with exit status: "
@@ -475,8 +455,7 @@ public class TestExecDriver extends Test
 
     try {
       populateMapPlan1(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src"));
-      File planFile = generatePlanFile();
-      executePlan(planFile);
+      executePlan();
       fileDiff("lt100.txt.deflate", "mapplan1.out");
     } catch (Throwable e) {
       e.printStackTrace();
@@ -490,8 +469,7 @@ public class TestExecDriver extends Test
 
     try {
       populateMapPlan2(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src"));
-      File planFile = generatePlanFile();
-      executePlan(planFile);
+      executePlan();
       fileDiff("lt100.txt", "mapplan2.out");
     } catch (Throwable e) {
       e.printStackTrace();
@@ -506,8 +484,7 @@ public class TestExecDriver extends Test
     try {
       populateMapRedPlan1(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
           "src"));
-      File planFile = generatePlanFile();
-      executePlan(planFile);
+      executePlan();
       fileDiff("kv1.val.sorted.txt", "mapredplan1.out");
     } catch (Throwable e) {
       e.printStackTrace();
@@ -522,8 +499,7 @@ public class TestExecDriver extends Test
     try {
       populateMapRedPlan2(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
           "src"));
-      File planFile = generatePlanFile();
-      executePlan(planFile);
+      executePlan();
       fileDiff("lt100.sorted.txt", "mapredplan2.out");
     } catch (Throwable e) {
       e.printStackTrace();
@@ -538,8 +514,7 @@ public class TestExecDriver extends Test
     try {
       populateMapRedPlan3(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
           "src"), db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src2"));
-      File planFile = generatePlanFile();
-      executePlan(planFile);
+      executePlan();
       fileDiff("kv1kv2.cogroup.txt", "mapredplan3.out");
     } catch (Throwable e) {
       e.printStackTrace();
@@ -554,8 +529,7 @@ public class TestExecDriver extends Test
     try {
       populateMapRedPlan4(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
           "src"));
-      File planFile = generatePlanFile();
-      executePlan(planFile);
+      executePlan();
       fileDiff("kv1.string-sorted.txt", "mapredplan4.out");
     } catch (Throwable e) {
       e.printStackTrace();
@@ -570,8 +544,7 @@ public class TestExecDriver extends Test
     try {
       populateMapRedPlan5(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
           "src"));
-      File planFile = generatePlanFile();
-      executePlan(planFile);
+      executePlan();
       fileDiff("kv1.string-sorted.txt", "mapredplan5.out");
     } catch (Throwable e) {
       e.printStackTrace();
@@ -586,8 +559,7 @@ public class TestExecDriver extends Test
     try {
       populateMapRedPlan6(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
           "src"));
-      File planFile = generatePlanFile();
-      executePlan(planFile);
+      executePlan();
       fileDiff("lt100.sorted.txt", "mapredplan6.out");
     } catch (Throwable e) {
       e.printStackTrace();

Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java Tue Jul
20 01:24:29 2010
@@ -26,6 +26,7 @@ import java.util.Map;
 import junit.framework.TestCase;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
 import org.apache.hadoop.hive.ql.plan.CollectDesc;
@@ -137,48 +138,47 @@ public class TestOperators extends TestC
     }
   }
 
-  public void testFileSinkOperator() throws Throwable {
-    try {
-      System.out.println("Testing FileSink Operator");
-      // col1
-      ExprNodeDesc exprDesc1 = TestExecDriver.getStringColumn("col1");
-
-      // col2
-      ExprNodeDesc expr1 = TestExecDriver.getStringColumn("col0");
-      ExprNodeDesc expr2 = new ExprNodeConstantDesc("1");
-      ExprNodeDesc exprDesc2 = TypeCheckProcFactory.DefaultExprProcessor
-          .getFuncExprNodeDesc("concat", expr1, expr2);
-
-      // select operator to project these two columns
-      ArrayList<ExprNodeDesc> earr = new ArrayList<ExprNodeDesc>();
-      earr.add(exprDesc1);
-      earr.add(exprDesc2);
-      ArrayList<String> outputCols = new ArrayList<String>();
-      for (int i = 0; i < earr.size(); i++) {
-        outputCols.add("_col" + i);
-      }
-      SelectDesc selectCtx = new SelectDesc(earr, outputCols);
-      Operator<SelectDesc> op = OperatorFactory.get(SelectDesc.class);
-      op.setConf(selectCtx);
-
-      // fileSinkOperator to dump the output of the select
-      // fileSinkDesc fsd = new fileSinkDesc ("file:///tmp" + File.separator +
-      // System.getProperty("user.name") + File.separator +
-      // "TestFileSinkOperator",
-      // Utilities.defaultTd, false);
-      // Operator<fileSinkDesc> flop = OperatorFactory.getAndMakeChild(fsd, op);
+  private void testTaskIds(String [] taskIds, String expectedAttemptId, String expectedTaskId)
{
+    Configuration conf = new JobConf(TestOperators.class);
+    for (String one: taskIds) {
+      conf.set("mapred.task.id", one);
+      String attemptId = Utilities.getTaskId(conf);
+      assertEquals(expectedAttemptId, attemptId);
+      assertEquals(Utilities.getTaskIdFromFilename(attemptId), expectedTaskId);
+      assertEquals(Utilities.getTaskIdFromFilename(attemptId + ".gz"), expectedTaskId);
+      assertEquals(Utilities.getTaskIdFromFilename
+                   (Utilities.toTempPath(new Path(attemptId + ".gz")).toString()), expectedTaskId);
+    }
+  }
 
-      op.initialize(new JobConf(TestOperators.class),
-          new ObjectInspector[] {r[0].oi});
+  /**
+   * More stuff needs to be added here. Currently it only checks some basic
+   * file naming libraries
+   * The old test was deactivated as part of hive-405
+   */
+  public void testFileSinkOperator() throws Throwable {
 
-      // evaluate on row
-      for (int i = 0; i < 5; i++) {
-        op.process(r[i].o, 0);
-      }
-      op.close(false);
+    try {
+      testTaskIds (new String [] {
+          "attempt_200707121733_0003_m_000005_0",
+          "attempt_local_0001_m_000005_0",
+          "task_200709221812_0001_m_000005_0",
+          "task_local_0001_m_000005_0"
+        }, "000005_0", "000005");
+
+      testTaskIds (new String [] {
+          "job_local_0001_map_000005",
+          "job_local_0001_reduce_000005",
+        }, "000005", "000005");
+
+      testTaskIds (new String [] {"1234567"},
+                   "1234567", "1234567");
+
+      assertEquals(Utilities.getTaskIdFromFilename
+                   ("/mnt/dev005/task_local_0001_m_000005_0"),
+                   "000005");
 
       System.out.println("FileSink Operator ok");
-
     } catch (Throwable e) {
       e.printStackTrace();
       throw e;

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/sample10.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/sample10.q.out?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/sample10.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/sample10.q.out Tue Jul 20 01:24:29
2010
@@ -102,14 +102,14 @@ STAGE PLANS:
                               type: bigint
       Needs Tagging: false
       Path -> Alias:
-        file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/attempt_local_0001_r_000000_0
[srcpartbucket]
-        file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/attempt_local_0001_r_000000_0
[srcpartbucket]
-        file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/attempt_local_0001_r_000000_0
[srcpartbucket]
-        file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/attempt_local_0001_r_000000_0
[srcpartbucket]
+        file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/000000_0
[srcpartbucket]
+        file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/000000_0
[srcpartbucket]
+        file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/000000_0
[srcpartbucket]
+        file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/000000_0
[srcpartbucket]
       Path -> Partition:
-        file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/attempt_local_0001_r_000000_0

+        file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/000000_0

           Partition
-            base file name: attempt_local_0001_r_000000_0
+            base file name: 000000_0
             input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
             output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
             partition values:
@@ -122,13 +122,13 @@ STAGE PLANS:
               columns.types string:string
               file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
               file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-              location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+              location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
               name srcpartbucket
               partition_columns ds/hr
               serialization.ddl struct srcpartbucket { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-              transient_lastDdlTime 1277145923
+              transient_lastDdlTime 1279517872
             serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
           
               input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -140,19 +140,19 @@ STAGE PLANS:
                 columns.types string:string
                 file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-                location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+                location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
                 name srcpartbucket
                 partition_columns ds/hr
                 serialization.ddl struct srcpartbucket { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                transient_lastDdlTime 1277145923
+                transient_lastDdlTime 1279517872
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: srcpartbucket
             name: srcpartbucket
-        file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/attempt_local_0001_r_000000_0

+        file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/000000_0

           Partition
-            base file name: attempt_local_0001_r_000000_0
+            base file name: 000000_0
             input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
             output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
             partition values:
@@ -165,13 +165,13 @@ STAGE PLANS:
               columns.types string:string
               file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
               file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-              location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+              location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
               name srcpartbucket
               partition_columns ds/hr
               serialization.ddl struct srcpartbucket { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-              transient_lastDdlTime 1277145923
+              transient_lastDdlTime 1279517872
             serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
           
               input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -183,19 +183,19 @@ STAGE PLANS:
                 columns.types string:string
                 file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-                location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+                location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
                 name srcpartbucket
                 partition_columns ds/hr
                 serialization.ddl struct srcpartbucket { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                transient_lastDdlTime 1277145923
+                transient_lastDdlTime 1279517872
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: srcpartbucket
             name: srcpartbucket
-        file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/attempt_local_0001_r_000000_0

+        file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/000000_0

           Partition
-            base file name: attempt_local_0001_r_000000_0
+            base file name: 000000_0
             input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
             output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
             partition values:
@@ -208,13 +208,13 @@ STAGE PLANS:
               columns.types string:string
               file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
               file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-              location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+              location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
               name srcpartbucket
               partition_columns ds/hr
               serialization.ddl struct srcpartbucket { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-              transient_lastDdlTime 1277145923
+              transient_lastDdlTime 1279517872
             serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
           
               input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -226,19 +226,19 @@ STAGE PLANS:
                 columns.types string:string
                 file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-                location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+                location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
                 name srcpartbucket
                 partition_columns ds/hr
                 serialization.ddl struct srcpartbucket { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                transient_lastDdlTime 1277145923
+                transient_lastDdlTime 1279517872
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: srcpartbucket
             name: srcpartbucket
-        file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/attempt_local_0001_r_000000_0

+        file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/000000_0

           Partition
-            base file name: attempt_local_0001_r_000000_0
+            base file name: 000000_0
             input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
             output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
             partition values:
@@ -251,13 +251,13 @@ STAGE PLANS:
               columns.types string:string
               file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
               file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-              location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+              location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
               name srcpartbucket
               partition_columns ds/hr
               serialization.ddl struct srcpartbucket { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-              transient_lastDdlTime 1277145923
+              transient_lastDdlTime 1279517872
             serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
           
               input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -269,13 +269,13 @@ STAGE PLANS:
                 columns.types string:string
                 file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-                location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+                location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
                 name srcpartbucket
                 partition_columns ds/hr
                 serialization.ddl struct srcpartbucket { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                transient_lastDdlTime 1277145923
+                transient_lastDdlTime 1279517872
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: srcpartbucket
             name: srcpartbucket
@@ -299,7 +299,7 @@ STAGE PLANS:
             File Output Operator
               compressed: false
               GlobalTableId: 0
-              directory: file:/data/users/nzhang/work/999/apache-hive/build/ql/scratchdir/hive_2010-06-21_11-45-29_813_4438091765019255104/10001
+              directory: file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/scratchdir/hive_2010-07-18_22-37-59_060_3045357226899710132/10001
               NumFilesPerFileSink: 1
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
@@ -322,14 +322,14 @@ PREHOOK: Input: default@srcpartbucket@ds
 PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-PREHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-30_185_5515955290012905688/10000
+PREHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-37-59_639_2719302052990534208/10000
 POSTHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on
key) where ds is not null group by ds
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-POSTHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-30_185_5515955290012905688/10000
+POSTHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-37-59_639_2719302052990534208/10000
 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds,
type:string, comment:null), ]
 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr,
type:string, comment:null), ]
 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds,
type:string, comment:null), ]
@@ -346,14 +346,14 @@ PREHOOK: Input: default@srcpartbucket@ds
 PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-PREHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-35_252_5650802559039809329/10000
+PREHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-03_708_4461107599597555917/10000
 POSTHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on
key) where ds is not null group by ds
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-POSTHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-35_252_5650802559039809329/10000
+POSTHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-03_708_4461107599597555917/10000
 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds,
type:string, comment:null), ]
 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr,
type:string, comment:null), ]
 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds,
type:string, comment:null), ]
@@ -370,14 +370,14 @@ PREHOOK: Input: default@srcpartbucket@ds
 PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-PREHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-40_176_6924299231993417982/10000
+PREHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-07_920_2071428667805216792/10000
 POSTHOOK: query: select * from srcpartbucket where ds is not null
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-POSTHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-40_176_6924299231993417982/10000
+POSTHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-07_920_2071428667805216792/10000
 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds,
type:string, comment:null), ]
 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr,
type:string, comment:null), ]
 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds,
type:string, comment:null), ]



Mime
View raw message