hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nzh...@apache.org
Subject svn commit: r950363 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Date Wed, 02 Jun 2010 04:23:38 GMT
Author: nzhang
Date: Wed Jun  2 04:23:38 2010
New Revision: 950363

URL: http://svn.apache.org/viewvc?rev=950363&view=rev
Log:
HIVE-1377. getPartitionDescFromPath() in CombineHiveInputFormat should handle matching by
path (Paul Yang via Ning Zhang)

Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=950363&r1=950362&r2=950363&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Jun  2 04:23:38 2010
@@ -473,6 +473,10 @@ Trunk -  Unreleased
     HIVE-1371. Bug in rcfilecat
     (He Yongqiang via namit)
 
+    HIVE-1377. getPartitionDescFromPath() in CombineHiveInputFormat should
+    handle matching by path	
+    (Paul Yang via Ning Zhang)
+
 Release 0.5.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=950363&r1=950362&r2=950363&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
(original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Wed Jun  2 04:23:38 2010
@@ -36,9 +36,9 @@ import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.hive.shims.HadoopShims.CombineFileInputFormatShim;
 import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim;
-import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.compress.CompressionCodecFactory;
@@ -350,12 +350,30 @@ public class CombineHiveInputFormat<K ex
           part = entry.getValue();
           break;
         } else {
-          Path p = new Path(entry.getKey());
+          Path p = new Path(keyPath);
           String newP = p.toUri().getPath().toString();
           if (dirStr.startsWith(newP)) {
             part = entry.getValue();
             break;
           }
+          // This case handles the situation where dir is a fully qualified
+          // subdirectory of a path in pathToPartitionInfo. e.g.
+          // dir = hdfs://host:9000/user/warehouse/tableName/abc
+          // pathToPartitionInfo = {/user/warehouse/tableName : myPart}
+          // In such a case, just compare the path components.
+
+          // This could result in aliasing if we have a case where
+          // two entries in pathToPartitionInfo differ only by scheme
+          // or authority, but this problem exists anyway in the above checks.
+
+          // This check was precipitated by changes that allow recursive dirs
+          // in the input path, and an upcoming change to CombineFileInputFormat
+          // where the paths in splits no longer have the scheme and authority
+          // stripped out.
+          if (dirPath.startsWith(newP)) {
+            part = entry.getValue();
+            break;
+          }
         }
       }
     }



Mime
View raw message