hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From heyongqi...@apache.org
Subject svn commit: r980626 - in /hadoop/hive/branches/branch-0.6: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
Date Fri, 30 Jul 2010 01:10:20 GMT
Author: heyongqiang
Date: Fri Jul 30 01:10:20 2010
New Revision: 980626

URL: http://svn.apache.org/viewvc?rev=980626&view=rev
Log:
HIVE-1492. FileSinkOperator should remove duplicated files from the same task based on file
sizes.(Ning Zhang via He Yongqiang)

Modified:
    hadoop/hive/branches/branch-0.6/CHANGES.txt
    hadoop/hive/branches/branch-0.6/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java

Modified: hadoop/hive/branches/branch-0.6/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.6/CHANGES.txt?rev=980626&r1=980625&r2=980626&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.6/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.6/CHANGES.txt Fri Jul 30 01:10:20 2010
@@ -566,6 +566,9 @@ Release 0.6.0 -  Unreleased
     HIVE-1455. lateral view does not work with column pruning.
     (Paul Yang via He Yongqiang)
 
+    HIVE-1492. FileSinkOperator should remove duplicated files from the same task based on
file sizes.
+    (Ning Zhang via He Yongqiang)
+
 Release 0.5.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/branches/branch-0.6/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.6/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=980626&r1=980625&r2=980626&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.6/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
(original)
+++ hadoop/hive/branches/branch-0.6/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
Fri Jul 30 01:10:20 2010
@@ -1135,12 +1135,26 @@ public final class Utilities {
         if (otherFile == null) {
           taskIdToFile.put(taskId, one);
         } else {
-          if (!fs.delete(one.getPath(), true)) {
+          // Compare the file sizes of all the attempt files for the same task, the largest
win
+          // any attempt files could contain partial results (due to task failures or
+          // speculative runs), but the largest should be the correct one since the result
+          // of a successful run should never be smaller than a failed/speculative run.
+          FileStatus toDelete = null;
+          if (otherFile.getLen() >= one.getLen()) {
+            toDelete = one;
+          } else {
+            toDelete = otherFile;
+            taskIdToFile.put(taskId, one);
+          }
+          long len1 = toDelete.getLen();
+          long len2 = taskIdToFile.get(taskId).getLen();
+          if (!fs.delete(toDelete.getPath(), true)) {
             throw new IOException("Unable to delete duplicate file: "
-                + one.getPath() + ". Existing file: " + otherFile.getPath());
+                + toDelete.getPath() + ". Existing file: " + taskIdToFile.get(taskId).getPath());
           } else {
-            LOG.warn("Duplicate taskid file removed: " + one.getPath()
-                + ". Existing file: " + otherFile.getPath());
+            LOG.warn("Duplicate taskid file removed: " + toDelete.getPath() + " with length
"
+                + len1 + ". Existing file: " +  taskIdToFile.get(taskId).getPath()
+                + " with length " + len2);
           }
         }
       }



Mime
View raw message