hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1576112 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/common/ ql/src/java/org/apache/hadoop/hive/ql/io/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Mon, 10 Mar 2014 22:40:45 GMT
Author: hashutosh
Date: Mon Mar 10 22:40:44 2014
New Revision: 1576112

URL: http://svn.apache.org/r1576112
Log:
HIVE-6585 : bucket map join fails in presence of _SUCCESS file (Ashutosh Chauhan via Vikram
Dixit)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q
    hive/trunk/ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out
Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java?rev=1576112&r1=1576111&r2=1576112&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java Mon Mar 10 22:40:44
2014
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -297,7 +298,14 @@ public final class FileUtils {
       List<FileStatus> results) throws IOException {
 
     if (fileStatus.isDir()) {
-      for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
+      for (FileStatus stat : fs.listStatus(fileStatus.getPath(), new PathFilter() {
+
+        @Override
+        public boolean accept(Path p) {
+          String name = p.getName();
+          return !name.startsWith("_") && !name.startsWith(".");
+        }
+      })) {
         listStatusRecursively(fs, stat, results);
       }
     } else {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java?rev=1576112&r1=1576111&r2=1576112&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java Mon
Mar 10 22:40:44 2014
@@ -27,6 +27,7 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -91,7 +92,14 @@ public class BucketizedHiveInputFormat<K
     List<IOException> errors = new ArrayList<IOException>();
 
     FileSystem fs = dir.getFileSystem(job);
-    FileStatus[] matches = fs.globStatus(dir);
+    FileStatus[] matches = fs.globStatus(dir, new PathFilter() {
+
+      @Override
+      public boolean accept(Path p) {
+        String name = p.getName();
+        return !name.startsWith("_") && !name.startsWith(".");
+      }
+    });
     if (matches == null) {
       errors.add(new IOException("Input path does not exist: " + dir));
     } else if (matches.length == 0) {

Added: hive/trunk/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q?rev=1576112&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q Mon Mar 10
22:40:44 2014
@@ -0,0 +1,15 @@
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/bmjpathfilter;
+
+create table t1 (dt string) location '${system:test.tmp.dir}/bmjpathfilter/t1';
+Create table t2 (dt string) stored as orc; 
+dfs -touchz ${system:test.tmp.dir}/bmjpathfilter/t1/_SUCCESS;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; 
+SET hive.optimize.bucketmapjoin=true; 
+
+SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt);
+ 
+SET hive.optimize.bucketmapjoin=false;
+set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+
+dfs -rmr ${system:test.tmp.dir}/bmjpathfilter;

Added: hive/trunk/ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out?rev=1576112&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out Mon Mar
10 22:40:44 2014
@@ -0,0 +1,26 @@
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: Create table t2 (dt string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: Create table t2 (dt string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####



Mime
View raw message