hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zs...@apache.org
Subject svn commit: r751583 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/test/queries/clientpositive/union3.q ql/src/test/results/clientpositive/union3.q.out
Date Mon, 09 Mar 2009 05:09:23 GMT
Author: zshao
Date: Mon Mar  9 05:09:22 2009
New Revision: 751583

URL: http://svn.apache.org/viewvc?rev=751583&view=rev
Log:
HIVE-308. UNION ALL: FileSinkOperator now adds files in case the target exists. (zshao)

Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union3.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union3.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=751583&r1=751582&r2=751583&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Mar  9 05:09:22 2009
@@ -142,6 +142,9 @@
 
   BUG FIXES
 
+    HIVE-308. UNION ALL: FileSinkOperator now adds files in case the target
+    exists. (zshao)
+
     HIVE-276. Fix input3_limit.q for hadoop 0.17. (zshao)
 
     HIVE-131. Remove uncommitted files from failed tasks.

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=751583&r1=751582&r2=751583&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Mon
Mar  9 05:09:22 2009
@@ -225,10 +225,8 @@
             // Step1: rename tmp output folder to final path. After this point, 
             // updates from speculative tasks still writing to tmpPath will not 
             // appear in finalPath
-            LOG.info("Renaming tmp dir: " + tmpPath + " to: " + finalPath);
-            if(!fs.rename(tmpPath, finalPath)) {
-              throw new HiveException("Unable to commit result directory: " + finalPath);
-            }
+            LOG.info("Moving tmp dir: " + tmpPath + " to: " + finalPath);
+            renameOrMoveFiles(fs, tmpPath, finalPath);
             // Step2: Clean any temp files from finalPath
             Utilities.removeTempFiles(fs, finalPath);
           }
@@ -241,4 +239,35 @@
     }
     super.jobClose(hconf, success);
   }
+  
+  /**
+   * Rename src to dst, or in the case dst already exists, move files in src 
+   * to dst.  If there is an existing file with the same name, the new file's 
+   * name will be appended with "_1", "_2", etc.
+   * @param fs the FileSystem where src and dst are on.  
+   * @param src the src directory
+   * @param dst the target directory
+   * @throws IOException 
+   */
+  static public void renameOrMoveFiles(FileSystem fs, Path src, Path dst) throws IOException
{
+    if (!fs.exists(dst)) {
+      fs.rename(src, dst);
+    } else {
+      // move file by file
+      FileStatus[] files = fs.listStatus(src);
+      for (int i=0; i<files.length; i++) {
+        Path srcFilePath = files[i].getPath();
+        String fileName = srcFilePath.getName();
+        Path dstFilePath = new Path(dst, fileName);
+        if (fs.exists(dstFilePath)) {
+          int suffix = 0;
+          do {
+            suffix++;
+            dstFilePath = new Path(dst, fileName + "_" + suffix);
+          } while (fs.exists(dstFilePath));
+        }
+        fs.rename(srcFilePath, dstFilePath);
+      }
+    }
+  }
 }

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union3.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union3.q?rev=751583&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union3.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union3.q Mon Mar  9 05:09:22 2009
@@ -0,0 +1,35 @@
+explain
+SELECT *
+FROM (
+  SELECT 1 AS id
+  FROM (SELECT * FROM src LIMIT 1) s1
+  CLUSTER BY id
+  UNION ALL
+  SELECT 2 AS id
+  FROM (SELECT * FROM src LIMIT 1) s1
+  CLUSTER BY id
+  UNION ALL
+  SELECT 3 AS id
+  FROM (SELECT * FROM src LIMIT 1) s2
+  UNION ALL
+  SELECT 4 AS id
+  FROM (SELECT * FROM src LIMIT 1) s2
+) a;
+
+
+SELECT *
+FROM (
+  SELECT 1 AS id
+  FROM (SELECT * FROM src LIMIT 1) s1
+  CLUSTER BY id
+  UNION ALL
+  SELECT 2 AS id
+  FROM (SELECT * FROM src LIMIT 1) s1
+  CLUSTER BY id
+  UNION ALL
+  SELECT 3 AS id
+  FROM (SELECT * FROM src LIMIT 1) s2
+  UNION ALL
+  SELECT 4 AS id
+  FROM (SELECT * FROM src LIMIT 1) s2
+) a;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/union3.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/union3.q.out?rev=751583&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/union3.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/union3.q.out Mon Mar  9 05:09:22
2009
@@ -0,0 +1,222 @@
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY
(TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT
(TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 1))) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR 1 id)) (TOK_CLUSTERBY (TOK_COLREF id)))) (TOK_UNION (TOK_QUERY (TOK_FROM
(TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 1))) s1)) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 2 id)) (TOK_CLUSTERBY (TOK_COLREF id))))
(TOK_UNION (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT
(TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT
1))) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 3
id)))) (TOK_QUERY (TOK_FROM (TOK_S
 UBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 1))) s2)) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 4 id))))))) a)) (TOK_INSERT (TOK_DESTINATION
(TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 is a root stage
+  Stage-4 is a root stage
+  Stage-5 depends on stages: Stage-4
+  Stage-6 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2-subquery1:a-subquery2-subquery1:s1:src 
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              Limit
+                Reduce Output Operator
+                  sort order: 
+                  tag: -1
+                  value expressions:
+                        expr: 0
+                        type: string
+                        expr: 1
+                        type: string
+      Reduce Operator Tree:
+        Extract
+          Limit
+            Select Operator
+              expressions:
+                    expr: 2
+                    type: int
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
+                    name: binary_table
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        /data/users/zshao/sync/apache-trunk-HIVE-308/build/ql/tmp/249317218/968169738.10002

+          Reduce Output Operator
+            key expressions:
+                  expr: 0
+                  type: int
+            sort order: +
+            Map-reduce partition columns:
+                  expr: 0
+                  type: int
+            tag: -1
+            value expressions:
+                  expr: 0
+                  type: int
+      Reduce Operator Tree:
+        Extract
+              Select Operator
+                expressions:
+                      expr: 0
+                      type: int
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2-subquery2-subquery2:a-subquery2-subquery2-subquery2:s2:src 
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              Limit
+                Reduce Output Operator
+                  sort order: 
+                  tag: -1
+                  value expressions:
+                        expr: 0
+                        type: string
+                        expr: 1
+                        type: string
+      Reduce Operator Tree:
+        Extract
+          Limit
+            Select Operator
+              expressions:
+                    expr: 4
+                    type: int
+                    Select Operator
+                      expressions:
+                            expr: 0
+                            type: int
+                      File Output Operator
+                        compressed: false
+                        GlobalTableId: 0
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:a-subquery1:s1:src 
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              Limit
+                Reduce Output Operator
+                  sort order: 
+                  tag: -1
+                  value expressions:
+                        expr: 0
+                        type: string
+                        expr: 1
+                        type: string
+      Reduce Operator Tree:
+        Extract
+          Limit
+            Select Operator
+              expressions:
+                    expr: 1
+                    type: int
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
+                    name: binary_table
+
+  Stage: Stage-5
+    Map Reduce
+      Alias -> Map Operator Tree:
+        /data/users/zshao/sync/apache-trunk-HIVE-308/build/ql/tmp/249317218/968169738.10003

+          Reduce Output Operator
+            key expressions:
+                  expr: 0
+                  type: int
+            sort order: +
+            Map-reduce partition columns:
+                  expr: 0
+                  type: int
+            tag: -1
+            value expressions:
+                  expr: 0
+                  type: int
+      Reduce Operator Tree:
+        Extract
+            Select Operator
+              expressions:
+                    expr: 0
+                    type: int
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+  Stage: Stage-6
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2-subquery2-subquery1:a-subquery2-subquery2-subquery1:s2:src 
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              Limit
+                Reduce Output Operator
+                  sort order: 
+                  tag: -1
+                  value expressions:
+                        expr: 0
+                        type: string
+                        expr: 1
+                        type: string
+      Reduce Operator Tree:
+        Extract
+          Limit
+            Select Operator
+              expressions:
+                    expr: 3
+                    type: int
+                    Select Operator
+                      expressions:
+                            expr: 0
+                            type: int
+                      File Output Operator
+                        compressed: false
+                        GlobalTableId: 0
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+3
+4
+2
+1



Mime
View raw message