hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject svn commit: r1652340 - /hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
Date Fri, 16 Jan 2015 04:46:43 GMT
Author: xuefu
Date: Fri Jan 16 04:46:42 2015
New Revision: 1652340

URL: http://svn.apache.org/r1652340
Log:
HIVE-9367: CombineFileInputFormatShim#getDirIndices is expensive (Jimmy via Xuefu)

Modified:
    hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java

Modified: hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
URL: http://svn.apache.org/viewvc/hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java?rev=1652340&r1=1652339&r2=1652340&view=diff
==============================================================================
--- hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
(original)
+++ hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
Fri Jan 16 04:46:42 2015
@@ -24,10 +24,8 @@ import java.lang.reflect.Constructor;
 import java.net.URI;
 import java.security.AccessControlException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
 
 import org.apache.commons.lang.ArrayUtils;
@@ -53,8 +51,6 @@ import org.apache.hadoop.mapred.lib.Comb
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.util.Progressable;
 
-import com.google.common.primitives.Longs;
-
 /**
  * Base implemention for shims against secure Hadoop 0.20.3/0.23.
  */
@@ -325,16 +321,9 @@ public abstract class HadoopShimsSecure
       ArrayList<InputSplitShim> inputSplitShims = new ArrayList<InputSplitShim>();
       for (int pos = 0; pos < splits.length; pos++) {
         CombineFileSplit split = (CombineFileSplit) splits[pos];
-        Set<Integer> dirIndices = getDirIndices(split.getPaths(), job);
-        if (dirIndices.size() != split.getPaths().length) {
-          List<Path> prunedPaths = prune(dirIndices, Arrays.asList(split.getPaths()));
-          List<Long> prunedStartOffsets = prune(dirIndices, Arrays.asList(
-            ArrayUtils.toObject(split.getStartOffsets())));
-          List<Long> prunedLengths = prune(dirIndices, Arrays.asList(
-            ArrayUtils.toObject(split.getLengths())));
-          inputSplitShims.add(new InputSplitShim(job, prunedPaths.toArray(new Path[prunedPaths.size()]),
-            Longs.toArray(prunedStartOffsets),
-            Longs.toArray(prunedLengths), split.getLocations()));
+        if (split.getPaths().length > 0) {
+          inputSplitShims.add(new InputSplitShim(job, split.getPaths(),
+            split.getStartOffsets(), split.getLengths(), split.getLocations()));
         }
       }
       return inputSplitShims.toArray(new InputSplitShim[inputSplitShims.size()]);
@@ -354,6 +343,27 @@ public abstract class HadoopShimsSecure
       return new CombineFileRecordReader(job, cfSplit, reporter, rrClass);
     }
 
+    @Override
+    protected FileStatus[] listStatus(JobConf job) throws IOException {
+      FileStatus[] result = super.listStatus(job);
+      boolean foundDir = false;
+      for (FileStatus stat: result) {
+        if (stat.isDir()) {
+          foundDir = true;
+          break;
+        }
+      }
+      if (!foundDir) {
+        return result;
+      }
+      ArrayList<FileStatus> files = new ArrayList<FileStatus>();
+      for (FileStatus stat: result) {
+        if (!stat.isDir()) {
+          files.add(stat);
+        }
+      }
+      return files.toArray(new FileStatus[files.size()]);
+    }
   }
 
   @Override
@@ -400,33 +410,6 @@ public abstract class HadoopShimsSecure
     LOG.debug("Return value is :" + retval);
   }
 
-  /**
-   * CombineFileInputFormat sometimes returns directories as splits, need to prune them.
-   */
-  private static Set<Integer> getDirIndices(Path[] paths, JobConf conf) throws IOException
{
-    Set<Integer> result = new HashSet<Integer>();
-    for (int i = 0; i < paths.length; i++) {
-      FileSystem fs = paths[i].getFileSystem(conf);
-      if (!fs.isFile(paths[i])) {
-        result.add(i);
-      }
-    }
-    return result;
-  }
-
-  private static <K> List<K> prune(Set<Integer> indicesToPrune, List<K>
elms) {
-    List<K> result = new ArrayList<K>();
-    int i = 0;
-    for (K elm : elms) {
-      if (indicesToPrune.contains(i)) {
-        continue;
-      }
-      result.add(elm);
-      i++;
-    }
-    return result;
-  }
-
   private static String[] dedup(String[] locations) throws IOException {
     Set<String> dedup = new HashSet<String>();
     Collections.addAll(dedup, locations);



Mime
View raw message