hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject svn commit: r588771 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/filecache/ src/java/org/apache/hadoop/mapred/ src/java/org/apache/hadoop/util/ src/test/org/apache/hadoop/util/ src/test/resources/
Date Fri, 26 Oct 2007 20:39:32 GMT
Author: omalley
Date: Fri Oct 26 13:39:31 2007
New Revision: 588771

URL: http://svn.apache.org/viewvc?rev=588771&view=rev
Log:
Reverting HADOOP-1622, because it broke map/reduce.

Removed:
    lucene/hadoop/trunk/src/java/org/apache/hadoop/util/JarUtils.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/util/TestJarUtils.java
    lucene/hadoop/trunk/src/test/resources/
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/build.xml
    lucene/hadoop/trunk/src/java/org/apache/hadoop/filecache/DistributedCache.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/util/RunJar.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=588771&r1=588770&r2=588771&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Oct 26 13:39:31 2007
@@ -142,9 +142,6 @@
     HADOOP-1968. FileSystem supports wildcard input syntax "{ }".
     (Hairong Kuang via dhruba)
 
-    HADOOP-1622.  Permit multiple jar files to be added to a job.
-    (Dennis Kubes via cutting)
-
   OPTIMIZATIONS
 
     HADOOP-1910.  Reduce the number of RPCs that DistributedFileSystem.create()

Modified: lucene/hadoop/trunk/build.xml
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/build.xml?rev=588771&r1=588770&r2=588771&view=diff
==============================================================================
--- lucene/hadoop/trunk/build.xml (original)
+++ lucene/hadoop/trunk/build.xml Fri Oct 26 13:39:31 2007
@@ -57,11 +57,8 @@
             value="${build.dir}/c++-examples/${build.platform}"/>
 
   <property name="test.src.dir" value="${basedir}/src/test"/>
-  <property name="test.resources.dir" value="${test.src.dir}/resources" />
-  <property name="test.src.dir" value="${basedir}/src/test"/>
   <property name="test.build.dir" value="${build.dir}/test"/>
   <property name="test.generated.dir" value="${test.build.dir}/src"/>
-  <property name="test.build.resources" value="${test.build.dir}/resources"/>
   <property name="test.build.data" value="${test.build.dir}/data"/>
   <property name="test.cache.data" value="${test.build.dir}/cache"/>
   <property name="test.debug.data" value="${test.build.dir}/debug"/>
@@ -139,7 +136,6 @@
   <target name="init">
     <mkdir dir="${build.dir}"/>
     <mkdir dir="${build.classes}"/>
-    <mkdir dir="${test.build.resources}" />
     <mkdir dir="${build.src}"/>
     <mkdir dir="${build.webapps}/task/WEB-INF"/>
     <mkdir dir="${build.webapps}/job/WEB-INF"/>
@@ -472,12 +468,6 @@
     <copy file="${test.src.dir}/org/apache/hadoop/mapred/test.zip" todir="${test.cache.data}"/>
     <copy file="${test.src.dir}/org/apache/hadoop/dfs/hadoop-12-dfs-dir.tgz" todir="${test.cache.data}"/>
     <copy file="${test.src.dir}/org/apache/hadoop/dfs/hadoop-12-dfs-dir.txt" todir="${test.cache.data}"/>
-    <delete dir="${test.debug.data}"/>
-    <mkdir dir="${test.debug.data}"/>
-    <copy file="${test.src.dir}/org/apache/hadoop/mapred/testscript.txt" todir="${test.debug.data}"/>
-    <copy todir="${test.build.resources}">
-      <fileset dir="${test.resources.dir}"/>
-    </copy>
   </target>
 
   <!-- ================================================================== -->
@@ -493,7 +483,6 @@
            fork="yes" maxmemory="256m" dir="${basedir}" timeout="${test.timeout}"
       errorProperty="tests.failed" failureProperty="tests.failed">
       <sysproperty key="test.build.data" value="${test.build.data}"/>
-      <sysproperty key="test.build.resources" value="${test.build.resources}"/>
       <sysproperty key="test.cache.data" value="${test.cache.data}"/>    	
       <sysproperty key="test.debug.data" value="${test.debug.data}"/>
       <sysproperty key="hadoop.log.dir" value="${test.log.dir}"/>

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/filecache/DistributedCache.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/filecache/DistributedCache.java?rev=588771&r1=588770&r2=588771&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/filecache/DistributedCache.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/filecache/DistributedCache.java Fri Oct
26 13:39:31 2007
@@ -290,7 +290,7 @@
       if (isArchive) {
         String tmpArchive = parchive.toString().toLowerCase();
         if (tmpArchive.endsWith(".jar")) {
-          JarUtils.unJar(new File(parchive.toString()), new File(parchive
+          RunJar.unJar(new File(parchive.toString()), new File(parchive
                                                                .getParent().toString()));
         } else if (tmpArchive.endsWith(".zip")) {
           FileUtil.unZip(new File(parchive.toString()), new File(parchive

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java?rev=588771&r1=588770&r2=588771&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java Fri Oct 26 13:39:31
2007
@@ -21,7 +21,6 @@
 import java.io.BufferedWriter;
 import java.io.DataInput;
 import java.io.DataOutput;
-import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStreamReader;
@@ -32,11 +31,9 @@
 import java.net.URI;
 import java.net.URL;
 import java.net.URLConnection;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 import java.util.Random;
 
@@ -47,7 +44,6 @@
 import org.apache.hadoop.filecache.DistributedCache;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.DataOutputBuffer;
@@ -60,7 +56,6 @@
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.mapred.TaskInProgress;
 import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.util.JarUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
@@ -429,115 +424,8 @@
     JobConf job = new JobConf(jobFile);
     return submitJob(job);
   }
-
-
-  /**
-   * Creates a mapreduce job jar file from all of the mapreduce job resources.
-   * 
-   * @param job The current job configuration.
-   * @param jobId The current job id.
-   * 
-   * @return The file path to the finished job jar.
-   * @throws IOException If an error occurs while creating the job jar file.
-   */
-  private File createJobJar(JobConf job, String jobId) 
-    throws IOException {
-    
-    // get both jar and jars as they can be set through either config
-    String[] resources = job.getJobResources();
-    boolean hasResources = (resources != null && resources.length > 0);   
-    File jobJarFile = null;
     
-    // check for either a single or multiple jars
-    if (hasResources) {
-
-      // allow resources to be found through the classpath, absolute path, 
-      // a directory or through a containing jar file
-      List<File> jarList = new ArrayList<File>();
-      for (int i = 0; i < resources.length; i++) {
-        
-        // get the current resource
-        String current = resources[i];
-        if (current != null && current.length() > 0) {
-          
-          // create a file from the current resource and see if it exists
-          File currentFile = new File(current);
-          boolean exists = currentFile.exists();
-          
-          // if the resource is not an absolute path to a file
-          if (!exists) {
-
-            // try converting it to a classname
-            try {
-              
-              // try to find the containing jar on the classpath
-              Class cls = Class.forName(current);
-              String jar = JarUtils.findContainingJar(cls);
-              if (jar != null) {
-                currentFile = new File(jar);
-                if (currentFile.exists()) {
-                  jarList.add(new File(jar));
-                  continue;
-                }
-              }
-            }
-            catch (ClassNotFoundException e) {
-              // do nothing, not a classname
-            }
- 
-            // try to find a resource on the classpath that matches, should be
-            // a jar but will technically find any matching resource
-            String jar = JarUtils.findJar(getClass(), current);
-            if (jar != null) {
-              currentFile = new File(jar);
-              if (currentFile.exists()) {
-                jarList.add(new File(jar));
-              }
-            }
-
-          }
-          else if (exists) {            
-            // the resource is an existing file or directory
-            jarList.add(new File(current));
-          }
-        }
-      }
-      
-      // get the list of final resources
-      int numResources = jarList.size();
-      File[] jarResources = ((numResources == 0) ? new File[0] : 
-        (File[])jarList.toArray(new File[jarList.size()]));
-      
-      // see if we are dealing with a single jar file
-      boolean hasSingleJar = false;
-      if (numResources == 1) {
-        File testJar =jarResources[0];
-        if (testJar.exists() && testJar.isFile()) {
-          hasSingleJar = true;
-          jobJarFile = testJar;
-        }
-      }
-      
-      // we only jar up if there is more than one resource or if there is a 
-      // single resource but it is not a jar file (i.e. it is a directory)
-      if (numResources > 1 || (numResources == 1 && !hasSingleJar) ){
- 
-        // create an jartmp directory in the hadoop.tmp.dir
-        File tmpDir = new File(job.get("hadoop.tmp.dir"));
-
-        // create a complete job jar file from the unjar directory contents
-        // in the system temp directory and delete on exit
-        jobJarFile = FileUtil.createLocalTempFile(tmpDir, jobId + ".job.", true);
-        boolean uncompress = getConf().getBoolean("mapred.job.resources.uncompress", 
-          true);
-        JarUtils.jarAll(jarResources, jobJarFile, uncompress);
-      }
-    }
-    
-    // return the finished job jar file path
-    return jobJarFile;
-  }
-  
+   
   /**
    * Submit a job to the MR system.
    * This returns a handle to the {@link RunningJob} which can be used to track
@@ -592,29 +480,23 @@
       }
       DistributedCache.setFileTimestamps(job, fileTimestamps.toString());
     }
-
-    // create the job jar file from all job jars
-    File jobJarFile = createJobJar(job, jobId);
+       
+    String originalJarPath = job.getJar();
     short replication = (short)job.getInt("mapred.submit.replication", 10);
-    
-    // if we have a job jar file
-    if (jobJarFile != null) {
-    
-      // set the job name to the job jar file name if no name is set
-      if ("".equals(job.getJobName())){        
-        String jobName = jobJarFile.getName();
-        job.setJobName(new Path(jobName).getName());
+
+    if (originalJarPath != null) {           // copy jar to JobTracker's fs
+      // use jar name if job is not named. 
+      if ("".equals(job.getJobName())){
+        job.setJobName(new Path(originalJarPath).getName());
       }
-      
-      // copy the merged job jar to the job filesystem and set replication
-      job.setJar(submitJarFile.toString());      
-      fs.copyFromLocalFile(new Path(jobJarFile.toString()), submitJarFile);
+      job.setJar(submitJarFile.toString());
+      fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile);
       fs.setReplication(submitJarFile, replication);
     } else {
       LOG.warn("No job jar file set.  User classes may not be found. "+
                "See JobConf(Class) or JobConf#setJar(String).");
     }
-    
+
     // Set the user's name and working directory
     String user = System.getProperty("user.name");
     job.setUser(user != null ? user : "Dr Who");
@@ -1072,7 +954,7 @@
   }
     
   public int run(String[] argv) throws Exception {
-
+    // process arguments
     String submitJobFile = null;
     String jobid = null;
     String taskid = null;

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java?rev=588771&r1=588770&r2=588771&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java Fri Oct 26 13:39:31
2007
@@ -21,14 +21,12 @@
 
 import java.io.IOException;
 
-import java.net.URL;
 import java.util.StringTokenizer;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Enumeration;
-import java.util.LinkedHashSet;
-import java.util.Set;
 
+import java.net.URL;
 import java.net.URLDecoder;
 
 import org.apache.commons.logging.Log;
@@ -39,18 +37,13 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.conf.Configuration;
 
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.io.*;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.compress.CompressionCodec;
 
 import org.apache.hadoop.mapred.lib.IdentityMapper;
 import org.apache.hadoop.mapred.lib.IdentityReducer;
 import org.apache.hadoop.mapred.lib.HashPartitioner;
-import org.apache.hadoop.util.JarUtils;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.Tool;
 
@@ -174,36 +167,6 @@
   }
 
   /**
-   * <p>Returns an array of the unique resources for this map reduce job.  These
-   * are resources added with the {@link #addJobResource(String)} method and are
-   * not default or final configuration resources.
-   * 
-   * @return String[] A array of unique job resources in priority order.
-   */
-  public String[] getJobResources() {
-    
-    // get the various jar config settings
-    String jar = get("mapred.jar");
-    String resources = get("mapred.job.resources");
-    
-    // add additional resources first, followed by setJar
-    Set<String> allResources = new LinkedHashSet<String>();
-    if (resources != null && resources.length() > 0) {
-      String[] resAr = resources.split(",");
-      for (int i = 0; i < resAr.length; i++) {
-        allResources.add(resAr[i].trim());
-      }
-    }
-    if (jar != null && jar.length() > 0) {
-      allResources.add(jar);
-    }
-    
-    // return as a string array
-    return (String[])allResources.toArray(new String[allResources.size()]);
-  }
-
-
-  /**
    * Checks if <b>mapred-default.xml</b> is on the CLASSPATH, if so
    * it warns the user and loads it as a {@link Configuration} resource.
    * 
@@ -219,42 +182,9 @@
   }
   
   /**
-   * <p>Adds an additional resource to the mapreduce job.  A resource can be
-   * a file on the local filesystem, the name of a class contained in a jar in 
-   * the classpath, a jar that is on the classpath, or a directory on the local 
-   * file system.</p> 
-   * 
-   * <p>When a job is submitted to the MR system, all resources are merged into
-   * a single job.jar file.  Each resource takes priority over any previously
-   * added resources.  If there are any conflicts, resources added later will
-   * overwrite resources added earlier.</p>
-   * 
-   * <p>One thing to note is that empty directories inside of resource jars will 
-   * not be copied over to the merged job jar.</p>
-   * 
-   * @param resource The resource to be added to the mapreduce job.
-   */
-  public void addJobResource(String resource) {
-    
-    if (resource != null && resource.length() > 0) {
-      
-      String resources = get("mapred.job.resources");
-      if (resources != null && resources.length() > 0) {
-        resources = (resource + "," + resources);
-      }
-      else {
-        resources = resource;
-      }
-      
-      set("mapred.job.resources", resources);
-    }
-  }
-
-  /**
    * Get the user jar for the map-reduce job.
    * 
    * @return the user jar for the map-reduce job.
-   * @deprecated Use {@link #getJobResources()} instead.
    */
   public String getJar() { return get("mapred.jar"); }
   
@@ -262,7 +192,6 @@
    * Set the user jar for the map-reduce job.
    * 
    * @param jar the user jar for the map-reduce job.
-   * @deprecated Use {@link #addJobResource(String)} instead.
    */
   public void setJar(String jar) { set("mapred.jar", jar); }
   
@@ -272,9 +201,9 @@
    * @param cls the example class.
    */
   public void setJarByClass(Class cls) {
-    String jar = JarUtils.findContainingJar(cls);
+    String jar = findContainingJar(cls);
     if (jar != null) {
-      addJobResource(cls.toString());
+      setJar(jar);
     }   
   }
 
@@ -1360,5 +1289,6 @@
     }
     return null;
   }
+
 }
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java?rev=588771&r1=588770&r2=588771&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Fri Oct 26 13:39:31
2007
@@ -71,7 +71,6 @@
 import org.apache.hadoop.metrics.jvm.JvmMetrics;
 import org.apache.hadoop.net.DNS;
 import org.apache.hadoop.util.DiskChecker;
-import org.apache.hadoop.util.JarUtils;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.RunJar;
 import org.apache.hadoop.util.StringUtils;
@@ -613,7 +612,7 @@
               throw new IOException("Mkdirs failed to create " + workDir.toString());
             }
           }
-          JarUtils.unJar(new File(localJarFile.toString()), workDir);
+          RunJar.unJar(new File(localJarFile.toString()), workDir);
         }
         rjob.keepJobFiles = ((localJobConf.getKeepTaskFilesPattern() != null) ||
                              localJobConf.getKeepFailedTaskFiles());

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/util/RunJar.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/util/RunJar.java?rev=588771&r1=588770&r2=588771&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/util/RunJar.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/util/RunJar.java Fri Oct 26 13:39:31 2007
@@ -30,12 +30,42 @@
 
 /** Run a Hadoop job jar. */
 public class RunJar {
-  
-  /** @deprecated Use {@link JarUtils#unJar(File, File)} instead. */
-  @Deprecated
-  public static void unJar(File jarFile, File toDir)
-    throws IOException {
-    JarUtils.unJar(jarFile, toDir);
+
+  /** Unpack a jar file into a directory. */
+  public static void unJar(File jarFile, File toDir) throws IOException {
+    JarFile jar = new JarFile(jarFile);
+    try {
+      Enumeration entries = jar.entries();
+      while (entries.hasMoreElements()) {
+        JarEntry entry = (JarEntry)entries.nextElement();
+        if (!entry.isDirectory()) {
+          InputStream in = jar.getInputStream(entry);
+          try {
+            File file = new File(toDir, entry.getName());
+            if (!file.getParentFile().mkdirs()) {
+              if (!file.getParentFile().isDirectory()) {
+                throw new IOException("Mkdirs failed to create " + 
+                                      file.getParentFile().toString());
+              }
+            }
+            OutputStream out = new FileOutputStream(file);
+            try {
+              byte[] buffer = new byte[8192];
+              int i;
+              while ((i = in.read(buffer)) != -1) {
+                out.write(buffer, 0, i);
+              }
+            } finally {
+              out.close();
+            }
+          } finally {
+            in.close();
+          }
+        }
+      }
+    } finally {
+      jar.close();
+    }
   }
 
   /** Run a Hadoop job jar.  If the main class is not in the jar's manifest,
@@ -99,7 +129,7 @@
         }
       });
 
-    JarUtils.unJar(file, workDir);
+    unJar(file, workDir);
     
     ArrayList<URL> classPath = new ArrayList<URL>();
     classPath.add(new File(workDir+"/").toURL());



Mime
View raw message