hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r941153 - in /hadoop/hive/trunk: ./ common/src/java/org/apache/hadoop/hive/common/ common/src/java/org/apache/hadoop/hive/conf/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/io/
Date Wed, 05 May 2010 05:25:15 GMT
Author: namit
Date: Wed May  5 05:25:14 2010
New Revision: 941153

URL: http://svn.apache.org/viewvc?rev=941153&view=rev
Log:
HIVE-1328. make mapred.input.dir.recursive work for select *
(John Sichi via namit)


Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
    hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=941153&r1=941152&r2=941153&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed May  5 05:25:14 2010
@@ -410,6 +410,9 @@ Trunk -  Unreleased
     HIVE-1329. alter table to external does not change table_type to external
     (John Sichi via namit)
 
+    HIVE-1328. make mapred.input.dir.recursive work for select *
+    (John Sichi via namit)
+
 Release 0.5.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java?rev=941153&r1=941152&r2=941153&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java Wed May
 5 05:25:14 2010
@@ -20,10 +20,12 @@ package org.apache.hadoop.hive.common;
 
 import java.io.IOException;
 import java.net.URI;
+import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
@@ -167,4 +169,26 @@ public final class FileUtils {
     return sb.toString();
   }
 
+  /**
+   * Recursively lists status for all files starting from a particular
+   * directory (or individual file as base case).
+   *
+   * @param fs file system
+   *
+   * @param fileStatus starting point in file system
+   *
+   * @param results receives enumeration of all files found
+   */
+  public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus,
+    List<FileStatus> results)
+    throws IOException {
+
+    if (fileStatus.isDir()) {
+      for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
+        listStatusRecursively(fs, stat, results);
+      }
+    } else {
+      results.add(fileStatus);
+    }
+  }
 }

Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=941153&r1=941152&r2=941153&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed May  5
05:25:14 2010
@@ -96,6 +96,7 @@ public class HiveConf extends Configurat
     HADOOPFS("fs.default.name", "file:///"),
     HADOOPMAPFILENAME("map.input.file", null),
     HADOOPMAPREDINPUTDIR("mapred.input.dir", null),
+    HADOOPMAPREDINPUTDIRRECURSIVE("mapred.input.dir.recursive", false),
     HADOOPJT("mapred.job.tracker", "local"),
     HADOOPNUMREDUCERS("mapred.reduce.tasks", 1),
     HADOOPJOBNAME("mapred.job.name", null),

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java?rev=941153&r1=941152&r2=941153&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java Wed May
 5 05:25:14 2010
@@ -34,6 +34,8 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.FetchWork;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -202,7 +204,7 @@ public class FetchOperator implements Se
           if (isNativeTable) {
             FileSystem fs = currPath.getFileSystem(job);
             if (fs.exists(currPath)) {
-              FileStatus[] fStats = fs.listStatus(currPath);
+              FileStatus[] fStats = listStatusUnderPath(fs, currPath);
               for (FileStatus fStat : fStats) {
                 if (fStat.getLen() > 0) {
                   tblDataDone = true;
@@ -237,7 +239,7 @@ public class FetchOperator implements Se
         prt = iterPartDesc.next();
       FileSystem fs = nxt.getFileSystem(job);
       if (fs.exists(nxt)) {
-        FileStatus[] fStats = fs.listStatus(nxt);
+        FileStatus[] fStats = listStatusUnderPath(fs, nxt);
         for (FileStatus fStat : fStats) {
           if (fStat.getLen() > 0) {
             currPath = nxt;
@@ -395,4 +397,30 @@ public class FetchOperator implements Se
           + org.apache.hadoop.util.StringUtils.stringifyException(e));
     }
   }
+
+  /**
+   * Lists status for all files under a given path.  Whether or not
+   * this is recursive depends on the setting of
+   * job configuration parameter mapred.input.dir.recursive.
+   *
+   * @param fs file system
+   *
+   * @param p path in file system
+   *
+   * @return list of file status entries
+   */
+  private FileStatus[] listStatusUnderPath(FileSystem fs, Path p)
+  throws IOException {
+    HiveConf hiveConf = new HiveConf(job, FetchOperator.class);
+    boolean recursive = 
+      hiveConf.getBoolVar(HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE);
+    if (!recursive) {
+      return fs.listStatus(p);
+    }
+    List<FileStatus> results = new ArrayList<FileStatus>();
+    for (FileStatus stat : fs.listStatus(p)) {
+      FileUtils.listStatusRecursively(fs, stat, results);
+    }
+    return results.toArray(new FileStatus[results.size()]);
+  }
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java?rev=941153&r1=941152&r2=941153&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
(original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
Wed May  5 05:25:14 2010
@@ -40,6 +40,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.ql.exec.ExecMapper;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -104,24 +105,6 @@ public class BucketizedHiveInputFormat<K
         reporter);
   }
 
-  protected FileStatus[] listStatus(FileSystem fs, FileStatus fileStatus)
-      throws IOException {
-    ArrayList<FileStatus> result = new ArrayList<FileStatus>();
-
-    if (fileStatus.isDir()) {
-      for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
-        for (FileStatus retStat : listStatus(fs, stat)) {
-          result.add(retStat);
-        }
-      }
-    } else {
-      result.add(fileStatus);
-    }
-
-    return result.toArray(new FileStatus[result.size()]);
-
-  }
-
   protected FileStatus[] listStatus(JobConf job, Path path) throws IOException {
     ArrayList<FileStatus> result = new ArrayList<FileStatus>();
     List<IOException> errors = new ArrayList<IOException>();
@@ -134,9 +117,7 @@ public class BucketizedHiveInputFormat<K
       errors.add(new IOException("Input Pattern " + path + " matches 0 files"));
     } else {
       for (FileStatus globStat : matches) {
-        for (FileStatus retStat : listStatus(fs, globStat)) {
-          result.add(retStat);
-        }
+        FileUtils.listStatusRecursively(fs, globStat, result);
       }
     }
 



Mime
View raw message