hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject svn commit: r1077116 [1/2] - in /hadoop/common/branches/branch-0.20-security-patches/src: c++/task-controller/ c++/task-controller/tests/ docs/src/documentation/content/xdocs/ mapred/org/apache/hadoop/filecache/ mapred/org/apache/hadoop/mapred/ mapred/...
Date Fri, 04 Mar 2011 03:42:39 GMT
Author: omalley
Date: Fri Mar  4 03:42:38 2011
New Revision: 1077116

URL: http://svn.apache.org/viewvc?rev=1077116&view=rev
Log:
commit f4c85e6b0345142d773647da75bba7c528c201bc
Author: Hemanth Yamijala <yhemanth@friendchild-lm.(none)>
Date:   Thu Jan 21 22:10:58 2010 +0530

    MAPREDUCE:856 from https://issues.apache.org/jira/secure/attachment/12431040/MAPREDUCE-856-20090908-y20.txt
    
    +++ b/YAHOO-CHANGES.txt
    +    MAPREDUCE-856. Setup secure permissions for distributed cache files.
    +    (Vinod Kumar Vavilapalli via yhemanth)
    +

Added:
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapreduce/server/tasktracker/
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapreduce/server/tasktracker/Localizer.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/mapred/TestTrackerDistributedCacheManagerWithLinuxTaskController.java
Modified:
    hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/main.c
    hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/task-controller.c
    hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/task-controller.h
    hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/tests/test-task-controller.c
    hadoop/common/branches/branch-0.20-security-patches/src/docs/src/documentation/content/xdocs/cluster_setup.xml
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/filecache/TaskDistributedCacheManager.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/CleanupQueue.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/InterTrackerProtocol.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/JobClient.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/JobConf.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/LinuxTaskController.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/MapTask.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/Task.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskController.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskRunner.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
    hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/pipes/Application.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/mapred/TestIsolationRunner.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/mapred/TestLocalizationWithLinuxTaskController.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/mapred/TestMiniMRWithDFSWithDistinctUsers.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/mapred/TestQueueManager.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/mapred/TestTaskTrackerLocalization.java

Modified: hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/main.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/c%2B%2B/task-controller/main.c?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/main.c (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/main.c Fri Mar  4 03:42:38 2011
@@ -105,10 +105,16 @@ int main(int argc, char **argv) {
   fprintf(LOGFILE, "main : user is %s\n", user_detail->pw_name);
 
   switch (command) {
+  case INITIALIZE_USER:
+    exit_code = initialize_user(user_detail->pw_name);
+    break;
   case INITIALIZE_JOB:
     job_id = argv[optind++];
     exit_code = initialize_job(job_id, user_detail->pw_name);
     break;
+  case INITIALIZE_DISTRIBUTEDCACHE:
+    exit_code = initialize_distributed_cache(user_detail->pw_name);
+    break;
   case LAUNCH_TASK_JVM:
     tt_root = argv[optind++];
     job_id = argv[optind++];

Modified: hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/task-controller.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/c%2B%2B/task-controller/task-controller.c?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/task-controller.c (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/task-controller.c Fri Mar  4 03:42:38 2011
@@ -120,16 +120,26 @@ int check_variable_against_config(const 
 /**
  * Utility function to concatenate argB to argA using the concat_pattern
  */
-char *concatenate(const char *argA, const char *argB, char *concat_pattern,
-    char *return_path_name) {
-  if (argA == NULL || argB == NULL) {
-    fprintf(LOGFILE, "One of the arguments passed for %s in null.\n",
-        return_path_name);
-    return NULL;
+char *concatenate(char *concat_pattern, char *return_path_name, int numArgs,
+    ...) {
+  va_list ap;
+  va_start(ap, numArgs);
+  int strlen_args = 0;
+  char *arg = NULL;
+  int j;
+  for (j = 0; j < numArgs; j++) {
+    arg = va_arg(ap, char*);
+    if (arg == NULL) {
+      fprintf(LOGFILE, "One of the arguments passed for %s in null.\n",
+          return_path_name);
+      return NULL;
+    }
+    strlen_args += strlen(arg);
   }
+  va_end(ap);
 
   char *return_path = NULL;
-  int str_len = strlen(concat_pattern) + strlen(argA) + strlen(argB);
+  int str_len = strlen(concat_pattern) + strlen_args;
 
   return_path = (char *) malloc(sizeof(char) * (str_len + 1));
   if (return_path == NULL) {
@@ -137,43 +147,62 @@ char *concatenate(const char *argA, cons
     return NULL;
   }
   memset(return_path, '\0', str_len + 1);
-  snprintf(return_path, str_len, concat_pattern, argA, argB);
+  va_start(ap, numArgs);
+  vsnprintf(return_path, str_len, concat_pattern, ap);
+  va_end(ap);
   return return_path;
 }
 
 /**
- * Get the job-directory path from tt_root and job-id
+ * Get the job-directory path from tt_root, user name and job-id
+ */
+char *get_job_directory(const char * tt_root, const char *user,
+    const char *jobid) {
+  return concatenate(TT_JOB_DIR_PATTERN, "job_dir_path", 3, tt_root, user,
+      jobid);
+}
+
+/**
+ * Get the user directory of a particular user
+ */
+char *get_user_directory(const char *tt_root, const char *user) {
+  return concatenate(USER_DIR_PATTERN, "user_dir_path", 2, tt_root, user);
+}
+
+/**
+ * Get the distributed cache directory for a particular user
  */
-char *get_job_directory(const char * tt_root, const char *jobid) {
-  return concatenate(tt_root, jobid, TT_JOB_DIR_PATTERN, "job_dir_path");
+char *get_distributed_cache_directory(const char *tt_root, const char *user) {
+  return concatenate(USER_DISTRIBUTED_CACHE_DIR_PATTERN, "dist_cache_path", 2,
+      tt_root, user);
 }
 
 char *get_job_work_directory(const char *job_dir) {
-  return concatenate(job_dir, "", JOB_DIR_TO_JOB_WORK_PATTERN,
-      "job_work_dir_path");
+  return concatenate(JOB_DIR_TO_JOB_WORK_PATTERN, "job_work_dir_path", 2,
+      job_dir, "");
 }
 /**
  * Get the attempt directory for the given attempt_id
  */
 char *get_attempt_directory(const char *job_dir, const char *attempt_id) {
-  return concatenate(job_dir, attempt_id, JOB_DIR_TO_ATTEMPT_DIR_PATTERN,
-      "attempt_dir_path");
+  return concatenate(JOB_DIR_TO_ATTEMPT_DIR_PATTERN, "attempt_dir_path", 2,
+      job_dir, attempt_id);
 }
 
 /*
  * Get the path to the task launcher file which is created by the TT
  */
 char *get_task_launcher_file(const char *job_dir, const char *attempt_dir) {
-  return concatenate(job_dir, attempt_dir, TASK_SCRIPT_PATTERN,
-      "task_script_path");
+  return concatenate(TASK_SCRIPT_PATTERN, "task_script_path", 2, job_dir,
+      attempt_dir);
 }
 
 /**
  * Get the log directory for the given attempt.
  */
 char *get_task_log_dir(const char *log_dir, const char *attempt_id) {
-  return concatenate(log_dir, attempt_id, ATTEMPT_LOG_DIR_PATTERN,
-      "task_log_dir");
+  return concatenate(ATTEMPT_LOG_DIR_PATTERN, "task_log_dir", 2, log_dir,
+      attempt_id);
 }
 
 /**
@@ -332,6 +361,17 @@ static int secure_path(const char *path,
     if (!process_path) {
       continue;
     }
+    if (compare_ownership(uid, gid, entry->fts_path) == 0) {
+      // already set proper permissions.
+      // This might happen with distributed cache.
+#ifdef DEBUG
+      fprintf(
+          LOGFILE,
+          "already has private permissions. Not trying to change again for %s",
+          entry->fts_path);
+#endif
+      continue;
+    }
 
     if (check_ownership(entry->fts_path) != 0) {
       fprintf(LOGFILE,
@@ -359,8 +399,8 @@ static int secure_path(const char *path,
  * Function to prepare the attempt directories for the task JVM.
  * This is done by changing the ownership of the attempt directory recursively
  * to the job owner. We do the following:
- *     *  sudo chown user:mapred -R taskTracker/jobcache/$jobid/$attemptid/
- *     *  sudo chmod 2770 -R taskTracker/jobcache/$jobid/$attemptid/
+ *  *  sudo chown user:mapred -R taskTracker/$user/jobcache/$jobid/$attemptid/
+ *  *  sudo chmod 2770 -R taskTracker/$user/jobcache/$jobid/$attemptid/
  */
 int prepare_attempt_directories(const char *job_id, const char *attempt_id,
     const char *user) {
@@ -395,7 +435,7 @@ int prepare_attempt_directories(const ch
   char **local_dir_ptr = local_dir;
   int failed = 0;
   while (*local_dir_ptr != NULL) {
-    job_dir = get_job_directory(*local_dir_ptr, job_id);
+    job_dir = get_job_directory(*local_dir_ptr, user, job_id);
     if (job_dir == NULL) {
       fprintf(LOGFILE, "Couldn't get job directory for %s.\n", job_id);
       failed = 1;
@@ -508,6 +548,20 @@ int get_user_details(const char *user) {
   return 0;
 }
 
+/**
+ * Compare ownership of a file with the given ids.
+ */
+int compare_ownership(uid_t uid, gid_t gid, char *path) {
+  struct stat filestat;
+  if (stat(path, &filestat) != 0) {
+    return UNABLE_TO_STAT_FILE;
+  }
+  if (uid == filestat.st_uid && gid == filestat.st_gid) {
+    return 0;
+  }
+  return 1;
+}
+
 /*
  * Function to check if the TaskTracker actually owns the file.
   */
@@ -527,6 +581,89 @@ int check_ownership(char *path) {
 }
 
 /**
+ * Function to initialize the user directories of a user.
+ * It does the following:
+ *     *  sudo chown user:mapred -R taskTracker/$user
+ *     *  sudo chmod 2570 -R taskTracker/$user
+ * This is done once per every user on the TaskTracker.
+ */
+int initialize_user(const char *user) {
+
+  if (user == NULL) {
+    fprintf(LOGFILE, "user passed is null.\n");
+    return INVALID_ARGUMENT_NUMBER;
+  }
+
+  if (get_user_details(user) < 0) {
+    fprintf(LOGFILE, "Couldn't get the user details of %s", user);
+    return INVALID_USER_NAME;
+  }
+
+  gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
+
+  char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
+  if (local_dir == NULL) {
+    fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
+    cleanup();
+    return INVALID_TT_ROOT;
+  }
+
+  char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
+#ifdef DEBUG
+  fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
+      full_local_dir_str);
+#endif
+
+  char *user_dir;
+  char **local_dir_ptr = local_dir;
+  int failed = 0;
+  while (*local_dir_ptr != NULL) {
+    user_dir = get_user_directory(*local_dir_ptr, user);
+    if (user_dir == NULL) {
+      fprintf(LOGFILE, "Couldn't get userdir directory for %s.\n", user);
+      failed = 1;
+      break;
+    }
+
+    struct stat filestat;
+    if (stat(user_dir, &filestat) != 0) {
+      if (errno == ENOENT) {
+#ifdef DEBUG
+        fprintf(LOGFILE, "user_dir %s doesn't exist. Not doing anything.\n",
+            user_dir);
+#endif
+      } else {
+        // stat failed because of something else!
+        fprintf(LOGFILE, "Failed to stat the user_dir %s\n",
+            user_dir);
+        failed = 1;
+        free(user_dir);
+        break;
+      }
+    } else if (secure_path(user_dir, user_detail->pw_uid, tasktracker_gid,
+        S_IRUSR | S_IXUSR | S_IRWXG, S_ISGID | S_IRUSR | S_IXUSR | S_IRWXG)
+        != 0) {
+      // No setgid on files and setgid on dirs, 570
+      fprintf(LOGFILE, "Failed to secure the user_dir %s\n",
+          user_dir);
+      failed = 1;
+      free(user_dir);
+      break;
+    }
+
+    local_dir_ptr++;
+    free(user_dir);
+  }
+  free(local_dir);
+  free(full_local_dir_str);
+  cleanup();
+  if (failed) {
+    return INITIALIZE_USER_FAILED;
+  }
+  return 0;
+}
+
+/**
  * Function to prepare the job directories for the task JVM.
  * We do the following:
  *     *  sudo chown user:mapred -R taskTracker/jobcache/$jobid
@@ -563,7 +700,7 @@ int initialize_job(const char *jobid, co
   char **local_dir_ptr = local_dir;
   int failed = 0;
   while (*local_dir_ptr != NULL) {
-    job_dir = get_job_directory(*local_dir_ptr, jobid);
+    job_dir = get_job_directory(*local_dir_ptr, user, jobid);
     if (job_dir == NULL) {
       fprintf(LOGFILE, "Couldn't get job directory for %s.\n", jobid);
       failed = 1;
@@ -608,6 +745,7 @@ int initialize_job(const char *jobid, co
               "job_work_dir %s doesn't exist. Not doing anything.\n",
               job_work_dir);
 #endif
+          free(job_work_dir);
         } else {
           // stat failed because of something else!
           fprintf(LOGFILE, "Failed to stat the job_work_dir %s\n",
@@ -641,6 +779,97 @@ int initialize_job(const char *jobid, co
 }
 
 /**
+ * Function to initialize the distributed cache files of a user.
+ * It does the following:
+ *     *  sudo chown user:mapred -R taskTracker/$user/distcache
+ *     *  sudo chmod 2570 -R taskTracker/$user/distcache
+ * This is done once per every JVM launch. Tasks reusing JVMs just create
+ * symbolic links themselves and so there isn't anything specific to do in
+ * that case.
+ * Sometimes, it happens that a task uses the whole or part of a directory
+ * structure in taskTracker/$user/distcache. In this case, some paths are
+ * already set proper private permissions by this same function called during
+ * a previous JVM launch. In the current invocation, we only do the
+ * chown/chmod operation of files/directories that are newly created by the
+ * TaskTracker (i.e. those that still are not owned by user:mapred)
+ */
+int initialize_distributed_cache(const char *user) {
+
+  if (user == NULL) {
+    fprintf(LOGFILE, "user passed is null.\n");
+    return INVALID_ARGUMENT_NUMBER;
+  }
+
+  if (get_user_details(user) < 0) {
+    fprintf(LOGFILE, "Couldn't get the user details of %s", user);
+    return INVALID_USER_NAME;
+  }
+
+  gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
+
+  char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
+  if (local_dir == NULL) {
+    fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
+    cleanup();
+    return INVALID_TT_ROOT;
+  }
+
+  char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
+#ifdef DEBUG
+  fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
+      full_local_dir_str);
+#endif
+
+  char *distcache_dir;
+  char **local_dir_ptr = local_dir;
+  int failed = 0;
+  while (*local_dir_ptr != NULL) {
+    distcache_dir = get_distributed_cache_directory(*local_dir_ptr, user);
+    if (distcache_dir == NULL) {
+      fprintf(LOGFILE, "Couldn't get distcache directory for %s.\n", user);
+      failed = 1;
+      break;
+    }
+
+    struct stat filestat;
+    if (stat(distcache_dir, &filestat) != 0) {
+      if (errno == ENOENT) {
+#ifdef DEBUG
+        fprintf(LOGFILE, "distcache_dir %s doesn't exist. Not doing anything.\n",
+            distcache_dir);
+#endif
+      } else {
+        // stat failed because of something else!
+        fprintf(LOGFILE, "Failed to stat the distcache_dir %s\n",
+            distcache_dir);
+        failed = 1;
+        free(distcache_dir);
+        break;
+      }
+    } else if (secure_path(distcache_dir, user_detail->pw_uid,
+        tasktracker_gid, S_IRUSR | S_IXUSR | S_IRWXG, S_ISGID | S_IRUSR
+            | S_IXUSR | S_IRWXG) != 0) {
+      // No setgid on files and setgid on dirs, 570
+      fprintf(LOGFILE, "Failed to secure the distcache_dir %s\n",
+          distcache_dir);
+      failed = 1;
+      free(distcache_dir);
+      break;
+    }
+
+    local_dir_ptr++;
+    free(distcache_dir);
+  }
+  free(local_dir);
+  free(full_local_dir_str);
+  cleanup();
+  if (failed) {
+    return INITIALIZE_DISTCACHE_FAILED;
+  }
+  return 0;
+}
+
+/**
  * Function used to initialize task. Prepares attempt_dir, jars_dir and
  * log_dir to be accessible by the child
  */
@@ -719,7 +948,7 @@ int run_task_as_user(const char * user, 
     goto cleanup;
   }
 
-  job_dir = get_job_directory(tt_root, jobid);
+  job_dir = get_job_directory(tt_root, user, jobid);
   if (job_dir == NULL) {
     fprintf(LOGFILE, "Couldn't obtain job_dir for %s in %s.\n", jobid, tt_root);
     exit_code = OUT_OF_MEMORY;

Modified: hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/task-controller.h
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/c%2B%2B/task-controller/task-controller.h?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/task-controller.h (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/task-controller.h Fri Mar  4 03:42:38 2011
@@ -37,7 +37,9 @@
 
 //command definitions
 enum command {
+  INITIALIZE_USER,
   INITIALIZE_JOB,
+  INITIALIZE_DISTRIBUTEDCACHE,
   LAUNCH_TASK_JVM,
   INITIALIZE_TASK,
   TERMINATE_TASK_JVM,
@@ -63,9 +65,15 @@ enum errorcodes {
   PREPARE_TASK_LOGS_FAILED, //16
   INVALID_TT_LOG_DIR, //17
   OUT_OF_MEMORY, //18
+  INITIALIZE_DISTCACHE_FAILED, //19
+  INITIALIZE_USER_FAILED, //20
 };
 
-#define TT_JOB_DIR_PATTERN "%s/taskTracker/jobcache/%s"
+#define USER_DIR_PATTERN "%s/taskTracker/%s"
+
+#define TT_JOB_DIR_PATTERN USER_DIR_PATTERN"/jobcache/%s"
+
+#define USER_DISTRIBUTED_CACHE_DIR_PATTERN USER_DIR_PATTERN"/distcache"
 
 #define JOB_DIR_TO_JOB_WORK_PATTERN "%s/work"
 
@@ -91,10 +99,14 @@ extern FILE *LOGFILE;
 int run_task_as_user(const char * user, const char *jobid, const char *taskid,
     const char *tt_root);
 
+int initialize_user(const char *user);
+
 int initialize_task(const char *jobid, const char *taskid, const char *user);
 
 int initialize_job(const char *jobid, const char *user);
 
+int initialize_distributed_cache(const char *user);
+
 int kill_user_task(const char *user, const char *task_pid, int sig);
 
 int prepare_attempt_directory(const char *attempt_dir, const char *user);

Modified: hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/tests/test-task-controller.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/c%2B%2B/task-controller/tests/test-task-controller.c?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/tests/test-task-controller.c (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/c++/task-controller/tests/test-task-controller.c Fri Mar  4 03:42:38 2011
@@ -75,7 +75,7 @@ void test_check_variable_against_config(
   }
 
   // Test the parsing of a multiple valued key from the config
-  char **values = (char **)get_values("mapred.local.dir");
+  char **values = (char **) get_values("mapred.local.dir");
   char **values_ptr = values;
   int i = 0;
   while (*values_ptr != NULL) {
@@ -111,11 +111,24 @@ void test_check_variable_against_config(
   rmdir(hadoop_conf_dir);
 }
 
+void test_get_user_directory() {
+  char *user_dir = (char *) get_user_directory("/tmp", "user");
+  printf("user_dir obtained is %s\n", user_dir);
+  int ret = 0;
+  if (strcmp(user_dir, "/tmp/taskTracker/user") != 0) {
+    ret = -1;
+  }
+  free(user_dir);
+  assert(ret == 0);
+}
+
 void test_get_job_directory() {
-  char *job_dir = (char *) get_job_directory("/tmp", "job_200906101234_0001");
+  char *job_dir = (char *) get_job_directory("/tmp", "user",
+      "job_200906101234_0001");
   printf("job_dir obtained is %s\n", job_dir);
   int ret = 0;
-  if (strcmp(job_dir, "/tmp/taskTracker/jobcache/job_200906101234_0001") != 0) {
+  if (strcmp(job_dir, "/tmp/taskTracker/user/jobcache/job_200906101234_0001")
+      != 0) {
     ret = -1;
   }
   free(job_dir);
@@ -123,30 +136,34 @@ void test_get_job_directory() {
 }
 
 void test_get_attempt_directory() {
-  char *attempt_dir = (char *) get_attempt_directory(
-      "/tmp/taskTracker/jobcache/job_200906101234_0001",
-      "attempt_200906112028_0001_m_000000_0");
+  char *job_dir = (char *) get_job_directory("/tmp", "user",
+      "job_200906101234_0001");
+  printf("job_dir obtained is %s\n", job_dir);
+  char *attempt_dir = (char *) get_attempt_directory(job_dir,
+      "attempt_200906101234_0001_m_000000_0");
   printf("attempt_dir obtained is %s\n", attempt_dir);
   int ret = 0;
   if (strcmp(
       attempt_dir,
-      "/tmp/taskTracker/jobcache/job_200906101234_0001/attempt_200906112028_0001_m_000000_0")
+      "/tmp/taskTracker/user/jobcache/job_200906101234_0001/attempt_200906101234_0001_m_000000_0")
       != 0) {
     ret = -1;
   }
+  free(job_dir);
   free(attempt_dir);
   assert(ret == 0);
 }
 
 void test_get_task_launcher_file() {
-  char *task_file = (char *) get_task_launcher_file(
-      "/tmp/taskTracker/jobcache/job_200906101234_0001",
+  char *job_dir = (char *) get_job_directory("/tmp", "user",
+      "job_200906101234_0001");
+  char *task_file = (char *) get_task_launcher_file(job_dir,
       "attempt_200906112028_0001_m_000000_0");
   printf("task_file obtained is %s\n", task_file);
   int ret = 0;
   if (strcmp(
       task_file,
-      "/tmp/taskTracker/jobcache/job_200906101234_0001/attempt_200906112028_0001_m_000000_0/taskjvm.sh")
+      "/tmp/taskTracker/user/jobcache/job_200906101234_0001/attempt_200906112028_0001_m_000000_0/taskjvm.sh")
       != 0) {
     ret = -1;
   }
@@ -168,13 +185,27 @@ void test_get_task_log_dir() {
 }
 
 int main(int argc, char **argv) {
-  printf("Starting tests\n");
+  printf("\nStarting tests\n");
   LOGFILE = stdout;
+
+  printf("\nTesting check_variable_against_config()\n");
   test_check_variable_against_config();
+
+  printf("\nTesting get_user_directory()\n");
+  test_get_user_directory();
+
+  printf("\nTesting get_job_directory()\n");
   test_get_job_directory();
+
+  printf("\nTesting get_attempt_directory()\n");
   test_get_attempt_directory();
+
+  printf("\nTesting get_task_launcher_file()\n");
   test_get_task_launcher_file();
+
+  printf("\nTesting get_task_log_dir()\n");
   test_get_task_log_dir();
-  printf("Finished tests\n");
+
+  printf("\nFinished tests\n");
   return 0;
 }

Modified: hadoop/common/branches/branch-0.20-security-patches/src/docs/src/documentation/content/xdocs/cluster_setup.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/docs/src/documentation/content/xdocs/cluster_setup.xml?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/docs/src/documentation/content/xdocs/cluster_setup.xml (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/docs/src/documentation/content/xdocs/cluster_setup.xml Fri Mar  4 03:42:38 2011
@@ -528,10 +528,11 @@
             the tasks. For maximum security, this task controller 
             sets up restricted permissions and user/group ownership of
             local files and directories used by the tasks such as the
-            job jar files, intermediate files and task log files. Currently
-            permissions on distributed cache files are opened up to be
-            accessible by all users. In future, it is expected that stricter
-            file permissions are set for these files too.
+            job jar files, intermediate files, task log files and distributed
+            cache files. Particularly note that, because of this, except the
+            job owner and tasktracker, no other user can access any of the
+            local files/directories including those localized as part of the
+            distributed cache.
             </td>
             </tr>
             </table>

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/filecache/TaskDistributedCacheManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/filecache/TaskDistributedCacheManager.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/filecache/TaskDistributedCacheManager.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/filecache/TaskDistributedCacheManager.java Fri Mar  4 03:42:38 2011
@@ -33,6 +33,7 @@ import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocalDirAllocator;
@@ -153,8 +154,18 @@ public class TaskDistributedCacheManager
       FileStatus fileStatus = fileSystem.getFileStatus(new Path(uri.getPath()));
       String cacheId = this.distributedCacheManager.makeRelative(uri, taskConf);
       String cachePath = cacheSubdir + Path.SEPARATOR + cacheId;
-      Path localPath = lDirAlloc.getLocalPathForWrite(cachePath,
-                                fileStatus.getLen(), taskConf);
+
+      // Get the local path if the cacheFile is already localized or create one
+      // if it doesn't
+      Path localPath;
+      try {
+        localPath = lDirAlloc.getLocalPathToRead(cachePath, taskConf);
+      } catch (DiskErrorException de) {
+        localPath =
+            lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(),
+                taskConf);
+      }
+
       String baseDir = localPath.toString().replace(cacheId, "");
       Path p = distributedCacheManager.getLocalCache(uri, taskConf,
           new Path(baseDir), fileStatus, 
@@ -221,7 +232,7 @@ public class TaskDistributedCacheManager
    * Creates a class loader that includes the designated
    * files and archives.
    */
-  public ClassLoader makeClassLoader(final ClassLoader parent) 
+  public ClassLoader makeClassLoader(final ClassLoader parent)
       throws MalformedURLException {
     final URL[] urls = new URL[classPaths.size()];
     for (int i = 0; i < classPaths.size(); ++i) {
@@ -231,7 +242,7 @@ public class TaskDistributedCacheManager
       @Override
       public ClassLoader run() {
         return new URLClassLoader(urls, parent);
-      }     
+      }
     });
   }
 }

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/CleanupQueue.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/CleanupQueue.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/CleanupQueue.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/CleanupQueue.java Fri Mar  4 03:42:38 2011
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.mapred;
 
-import java.io.IOException;
 import java.util.concurrent.LinkedBlockingQueue;
 
 import org.apache.commons.logging.Log;

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java Fri Mar  4 03:42:38 2011
@@ -36,8 +36,12 @@ import org.apache.commons.logging.LogFac
  * This class provides an implementation for launching and killing 
  * tasks that need to be run as the tasktracker itself. Hence,
  * many of the initializing or cleanup methods are not required here.
+ * 
+ * <br/>
+ * 
+ *  NOTE: This class is internal only class and not intended for users!!
  */
-class DefaultTaskController extends TaskController {
+public class DefaultTaskController extends TaskController {
 
   private static final Log LOG = 
       LogFactory.getLog(DefaultTaskController.class);
@@ -128,5 +132,15 @@ class DefaultTaskController extends Task
       }
     }
   }
+
+  @Override
+  public void initializeDistributedCache(InitializationContext context) {
+    // Do nothing.
+  }
+
+  @Override
+  public void initializeUser(InitializationContext context) {
+    // Do nothing.
+  }
   
 }

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/InterTrackerProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/InterTrackerProtocol.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/InterTrackerProtocol.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/InterTrackerProtocol.java Fri Mar  4 03:42:38 2011
@@ -63,8 +63,9 @@ interface InterTrackerProtocol extends V
    * Version 25: JobIDs are passed in response to JobTracker restart 
    * Version 26: Added numRequiredSlots to TaskStatus for MAPREDUCE-516
    * Version 27: Adding node health status to TaskStatus for MAPREDUCE-211
+   * Version 28: Adding user name to the serialized Task for use by TT.
    */
-  public static final long versionID = 27L;
+  public static final long versionID = 28L;
   
   public final static int TRACKERS_OK = 0;
   public final static int UNKNOWN_TASKTRACKER = 1;

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/IsolationRunner.java Fri Mar  4 03:42:38 2011
@@ -183,8 +183,8 @@ public class IsolationRunner {
     // where it is.
     Path localMetaSplit = 
         new LocalDirAllocator("mapred.local.dir").getLocalPathToRead(
-            TaskTracker.getLocalSplitFile(taskId.getJobID().toString(), taskId
-                .toString()), conf);
+            TaskTracker.getLocalSplitFile(conf.getUser(), taskId.getJobID()
+                .toString(), taskId.toString()), conf);
     DataInputStream splitFile = FileSystem.getLocal(conf).open(localMetaSplit);
     TaskSplitIndex splitIndex = new TaskSplitIndex();
     splitIndex.readFields(splitFile);

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/JobClient.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/JobClient.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/JobClient.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/JobClient.java Fri Mar  4 03:42:38 2011
@@ -571,8 +571,8 @@ public class JobClient extends Configure
      * set this user's id in job configuration, so later job files can be
      * accessed using this user's id
      */
-    UnixUserGroupInformation ugi = getUGI(job);
-      
+    setUGIAndUserGroupNames(job);
+
     //
     // Figure out what fs the JobTracker is using.  Copy the
     // job to it, under a temporary name.  This allows DFS to work,
@@ -665,15 +665,23 @@ public class JobClient extends Configure
       LOG.warn("No job jar file set.  User classes may not be found. "+
                "See JobConf(Class) or JobConf#setJar(String).");
     }
+  }
 
-    // Set the user's name and working directory
+  /**
+   * Set the UGI, user name and the group name for the job.
+   * 
+   * @param job
+   * @throws IOException
+   */
+  void setUGIAndUserGroupNames(JobConf job)
+      throws IOException {
+    UnixUserGroupInformation ugi = getUGI(job);
     job.setUser(ugi.getUserName());
     if (ugi.getGroupNames().length > 0) {
       job.set("group.name", ugi.getGroupNames()[0]);
     }
   }
 
-
   private UnixUserGroupInformation getUGI(Configuration job) throws IOException {
     UnixUserGroupInformation ugi = null;
     try {

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/JobConf.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/JobConf.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/JobConf.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/JobConf.java Fri Mar  4 03:42:38 2011
@@ -145,7 +145,12 @@ public class JobConf extends Configurati
    * indicates that the options are turned off.
    */
   public static final long DISABLED_MEMORY_LIMIT = -1L;
-  
+
+  /**
+   * Property name for the configuration property mapred.local.dir
+   */
+  public static final String MAPRED_LOCAL_DIR_PROPERTY = "mapred.local.dir";
+
   /**
    * Name of the queue to which jobs will be submitted, if no queue
    * name is mentioned.
@@ -402,7 +407,7 @@ public class JobConf extends Configurati
   }
 
   public String[] getLocalDirs() throws IOException {
-    return getStrings("mapred.local.dir");
+    return getStrings(MAPRED_LOCAL_DIR_PROPERTY);
   }
 
   public void deleteLocalFiles() throws IOException {
@@ -424,7 +429,7 @@ public class JobConf extends Configurati
    * local directories.
    */
   public Path getLocalPath(String pathString) throws IOException {
-    return getLocalPath("mapred.local.dir", pathString);
+    return getLocalPath(MAPRED_LOCAL_DIR_PROPERTY, pathString);
   }
 
   /**
@@ -1562,7 +1567,7 @@ public class JobConf extends Configurati
    * <p>
    * When a job starts, a shared directory is created at location
    * <code>
-   * ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ </code>.
+   * ${mapred.local.dir}/taskTracker/$user/jobcache/$jobid/work/ </code>.
    * This directory is exposed to the users through 
    * <code>job.local.dir </code>.
    * So, the tasks can use this space 

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/LinuxTaskController.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/LinuxTaskController.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/LinuxTaskController.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/LinuxTaskController.java Fri Mar  4 03:42:38 2011
@@ -80,7 +80,9 @@ class LinuxTaskController extends TaskCo
    * List of commands that the setuid script will execute.
    */
   enum TaskCommands {
+    INITIALIZE_USER,
     INITIALIZE_JOB,
+    INITIALIZE_DISTRIBUTEDCACHE,
     LAUNCH_TASK_JVM,
     INITIALIZE_TASK,
     TERMINATE_TASK_JVM,
@@ -258,8 +260,10 @@ class LinuxTaskController extends TaskCo
     String taskId = context.task.getTaskID().toString();
     for (String dir : mapredLocalDirs) {
       File mapredDir = new File(dir);
-      File taskDir = new File(mapredDir, TaskTracker.getTaskWorkDir(
-          jobId, taskId, context.task.isTaskCleanupTask())).getParentFile();
+      File taskDir =
+          new File(mapredDir, TaskTracker.getTaskWorkDir(context.task
+              .getUser(), jobId, taskId, context.task.isTaskCleanupTask()))
+              .getParentFile();
       if (directory.equals(taskDir)) {
         return dir;
       }
@@ -270,6 +274,24 @@ class LinuxTaskController extends TaskCo
                 + directory.getAbsolutePath());
   }
 
+  @Override
+  public void initializeDistributedCache(InitializationContext context)
+      throws IOException {
+    LOG.debug("Going to initialize distributed cache for " + context.user
+        + " on the TT");
+    runCommand(TaskCommands.INITIALIZE_DISTRIBUTEDCACHE, context.user,
+        new ArrayList<String>(), context.workDir, null);
+  }
+
+  @Override
+  public void initializeUser(InitializationContext context)
+      throws IOException {
+    LOG.debug("Going to initialize user directories for " + context.user
+        + " on the TT");
+    runCommand(TaskCommands.INITIALIZE_USER, context.user,
+        new ArrayList<String>(), context.workDir, null);
+  }
+
   /**
    * Builds the command line for launching/terminating/killing task JVM.
    * Following is the format for launching/terminating/killing task JVM

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/MapTask.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/MapTask.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/MapTask.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/MapTask.java Fri Mar  4 03:42:38 2011
@@ -118,8 +118,8 @@ class MapTask extends Task {
       // localize the split meta-information
       Path localSplitMeta =
         new LocalDirAllocator("mapred.local.dir").getLocalPathForWrite(
-            TaskTracker.getLocalSplitFile(getJobID().toString(), getTaskID()
-                .toString()), conf);
+            TaskTracker.getLocalSplitFile(conf.getUser(), getJobID()
+                .toString(), getTaskID().toString()), conf);
       LOG.debug("Writing local split to " + localSplitMeta);
       DataOutputStream out = FileSystem.getLocal(conf).create(localSplitMeta);
       splitMetaInfo.write(out);

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/Task.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/Task.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/Task.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/Task.java Fri Mar  4 03:42:38 2011
@@ -120,7 +120,7 @@ abstract public class Task implements Wr
   ////////////////////////////////////////////
 
   private String jobFile;                         // job configuration file
-  private String user;
+  private String user;                            // user running the job
   private TaskAttemptID taskId;                   // unique, includes job id
   private int partition;                          // id within job
   TaskStatus taskStatus;                          // current status of the task
@@ -364,7 +364,7 @@ abstract public class Task implements Wr
   boolean isMapOrReduce() {
     return !jobSetup && !jobCleanup && !taskCleanup;
   }
-  
+
   /**
    * Get the name of the user running the job/task. TaskTracker needs task's
    * user name even before it's JobConf is localized. So we explicitly serialize

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskController.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskController.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskController.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskController.java Fri Mar  4 03:42:38 2011
@@ -19,15 +19,13 @@ package org.apache.hadoop.mapred;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.mapred.JvmManager.JvmEnv;
-import org.apache.hadoop.mapred.TaskTracker.PermissionsHandler;
+import org.apache.hadoop.mapreduce.server.tasktracker.Localizer;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 
@@ -38,9 +36,13 @@ import org.apache.hadoop.util.Shell.Shel
  * This class defines the API for initializing, finalizing and cleaning
  * up of tasks, as also the launching and killing task JVMs.
  * Subclasses of this class will implement the logic required for
- * performing the actual actions. 
+ * performing the actual actions.
+ * 
+ * <br/>
+ * 
+ * NOTE: This class is internal only class and not intended for users!!
  */
-abstract class TaskController implements Configurable {
+public abstract class TaskController implements Configurable {
   
   private Configuration conf;
   
@@ -78,29 +80,8 @@ abstract class TaskController implements
         LOG.warn("Unable to create mapred-local directory : "
             + mapredlocalDir.getPath());
       } else {
-        PermissionsHandler.setPermissions(mapredlocalDir,
-            PermissionsHandler.sevenFiveFive);
-      }
-
-      // Set up the cache directory used for distributed cache files
-      File distributedCacheDir =
-          new File(localDir, TaskTracker.getDistributedCacheDir());
-      if (!distributedCacheDir.exists() && !distributedCacheDir.mkdirs()) {
-        LOG.warn("Unable to create cache directory : "
-            + distributedCacheDir.getPath());
-      } else {
-        PermissionsHandler.setPermissions(distributedCacheDir,
-            PermissionsHandler.sevenFiveFive);
-      }
-
-      // Set up the jobcache directory
-      File jobCacheDir = new File(localDir, TaskTracker.getJobCacheSubdir());
-      if (!jobCacheDir.exists() && !jobCacheDir.mkdirs()) {
-        LOG.warn("Unable to create job cache directory : "
-            + jobCacheDir.getPath());
-      } else {
-        PermissionsHandler.setPermissions(jobCacheDir,
-            PermissionsHandler.sevenFiveFive);
+        Localizer.PermissionsHandler.setPermissions(mapredlocalDir,
+            Localizer.PermissionsHandler.sevenFiveFive);
       }
     }
 
@@ -109,8 +90,8 @@ abstract class TaskController implements
     if (!taskLog.exists() && !taskLog.mkdirs()) {
       LOG.warn("Unable to create taskLog directory : " + taskLog.getPath());
     } else {
-      PermissionsHandler.setPermissions(taskLog,
-          PermissionsHandler.sevenFiveFive);
+      Localizer.PermissionsHandler.setPermissions(taskLog,
+          Localizer.PermissionsHandler.sevenFiveFive);
     }
   }
 
@@ -124,6 +105,17 @@ abstract class TaskController implements
   abstract void initializeJob(JobInitializationContext context) throws IOException;
 
   /**
+   * Take task-controller specific actions to initialize the distributed cache
+   * files. This involves setting appropriate permissions for these files so as
+   * to secure them to be accessible only their owners.
+   * 
+   * @param context
+   * @throws IOException
+   */
+  public abstract void initializeDistributedCache(InitializationContext context)
+      throws IOException;
+
+  /**
    * Launch a task JVM
    * 
    * This method defines how a JVM will be launched to run a task. Each
@@ -194,10 +186,17 @@ abstract class TaskController implements
     long sleeptimeBeforeSigkill; // waiting time before sending SIGKILL to task JVM after sending SIGTERM
   }
 
-  static class JobInitializationContext {
+  /**
+   * NOTE: This class is internal only class and not intended for users!!
+   * 
+   */
+  public static class InitializationContext {
+    public File workDir;
+    public String user;
+  }
+
+  static class JobInitializationContext extends InitializationContext {
     JobID jobid;
-    File workDir;
-    String user;
   }
 
   /**
@@ -214,4 +213,13 @@ abstract class TaskController implements
    * @param context task context
    */
   abstract void killTask(TaskControllerContext context);
+
+  /**
+   * Initialize user on this TaskTracer in a TaskController specific manner.
+   * 
+   * @param context
+   * @throws IOException
+   */
+  public abstract void initializeUser(InitializationContext context)
+      throws IOException;
 }

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskRunner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskRunner.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskRunner.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskRunner.java Fri Mar  4 03:42:38 2011
@@ -35,13 +35,14 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.filecache.DistributedCache;
 import org.apache.hadoop.filecache.TaskDistributedCacheManager;
 import org.apache.hadoop.filecache.TrackerDistributedCacheManager;
+import org.apache.hadoop.mapreduce.server.tasktracker.Localizer;
 import org.apache.hadoop.fs.FSError;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.TaskTracker.PermissionsHandler;
+import org.apache.hadoop.mapred.TaskController.InitializationContext;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.mapreduce.JobContext;
@@ -161,13 +162,18 @@ abstract class TaskRunner extends Thread
       taskDistributedCacheManager = tracker.getTrackerDistributedCacheManager()
                                     .newTaskDistributedCacheManager(conf);
       taskDistributedCacheManager.setup(lDirAlloc, workDir,
-                                        TaskTracker.getDistributedCacheDir());
+                          TaskTracker.getDistributedCacheDir(conf.getUser()));
       
       // Set up the child task's configuration. After this call, no localization
       // of files should happen in the TaskTracker's process space. Any changes to
       // the conf object after this will NOT be reflected to the child.
       setupChildTaskConfiguration(lDirAlloc);
       
+      InitializationContext context = new InitializationContext();
+      context.user = conf.getUser();
+      context.workDir = new File(conf.get(TaskTracker.JOB_LOCAL_DIR));
+      tracker.getTaskController().initializeDistributedCache(context);
+
       if (!prepare()) {
         return;
       }
@@ -262,8 +268,8 @@ abstract class TaskRunner extends Thread
     if (!b) {
       LOG.warn("mkdirs failed. Ignoring");
     } else {
-      PermissionsHandler.setPermissions(logDir,
-          PermissionsHandler.sevenZeroZero);
+      Localizer.PermissionsHandler.setPermissions(logDir,
+          Localizer.PermissionsHandler.sevenZeroZero);
     }
     return logFiles;
   }
@@ -279,9 +285,9 @@ abstract class TaskRunner extends Thread
       throws IOException {
 
     Path localTaskFile =
-        lDirAlloc.getLocalPathForWrite(TaskTracker.getTaskConfFile(t
-            .getJobID().toString(), t.getTaskID().toString(), t
-            .isTaskCleanupTask()), conf);
+        lDirAlloc.getLocalPathForWrite(TaskTracker.getTaskConfFile(
+            t.getUser(), t.getJobID().toString(), t.getTaskID().toString(), t
+                .isTaskCleanupTask()), conf);
 
     // write the child's task configuration file to the local disk
     writeLocalTaskFile(localTaskFile.toString(), conf);
@@ -569,16 +575,17 @@ abstract class TaskRunner extends Thread
    * process space.
    */
   static void setupChildMapredLocalDirs(Task t, JobConf conf) {
-    String[] localDirs = conf.getStrings("mapred.local.dir");
+    String[] localDirs = conf.getStrings(JobConf.MAPRED_LOCAL_DIR_PROPERTY);
     String jobId = t.getJobID().toString();
     String taskId = t.getTaskID().toString();
     boolean isCleanup = t.isTaskCleanupTask();
+    String user = t.getUser();
     StringBuffer childMapredLocalDir =
         new StringBuffer(localDirs[0] + Path.SEPARATOR
-            + TaskTracker.getLocalTaskDir(jobId, taskId, isCleanup));
+            + TaskTracker.getLocalTaskDir(user, jobId, taskId, isCleanup));
     for (int i = 1; i < localDirs.length; i++) {
       childMapredLocalDir.append("," + localDirs[i] + Path.SEPARATOR
-          + TaskTracker.getLocalTaskDir(jobId, taskId, isCleanup));
+          + TaskTracker.getLocalTaskDir(user, jobId, taskId, isCleanup));
     }
     LOG.debug("mapred.local.dir for child : " + childMapredLocalDir);
     conf.set("mapred.local.dir", childMapredLocalDir.toString());
@@ -589,8 +596,9 @@ abstract class TaskRunner extends Thread
       TaskAttemptID task, boolean isCleanup, JobConf conf) 
       throws IOException {
     Path workDir =
-        lDirAlloc.getLocalPathToRead(TaskTracker.getTaskWorkDir(task
-            .getJobID().toString(), task.toString(), isCleanup), conf);
+        lDirAlloc.getLocalPathToRead(TaskTracker.getTaskWorkDir(
+            conf.getUser(), task.getJobID().toString(), task.toString(),
+            isCleanup), conf);
 
     return new File(workDir.toString());
   }

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskTracker.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/TaskTracker.java Fri Mar  4 03:42:38 2011
@@ -54,6 +54,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.filecache.TrackerDistributedCacheManager;
+import org.apache.hadoop.mapreduce.server.tasktracker.Localizer;
 import org.apache.hadoop.fs.DF;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileStatus;
@@ -184,7 +185,7 @@ public class TaskTracker 
   
   // The filesystem where job files are stored
   FileSystem systemFS = null;
-  private FileSystem localFs = null;
+  FileSystem localFs = null;
   private final HttpServer server;
     
   volatile boolean shuttingDown = false;
@@ -222,8 +223,8 @@ public class TaskTracker 
   //for serving map output to the other nodes
 
   static Random r = new Random();
-  static final String SUBDIR = "taskTracker";
-  private static final String DISTCACHEDIR = "distcache";
+  public static final String SUBDIR = "taskTracker";
+  static final String DISTCACHEDIR = "distcache";
   static final String JOBCACHE = "jobcache";
   static final String OUTPUT = "output";
   private static final String JARSDIR = "jars";
@@ -235,6 +236,7 @@ public class TaskTracker 
 
   private JobConf fConf;
   private JobConf originalConf;
+  private Localizer localizer;
   private int maxMapSlots;
   private int maxReduceSlots;
   private int failures;
@@ -250,7 +252,7 @@ public class TaskTracker 
   
   private MapEventsFetcherThread mapEventsFetcher;
   int workerThreads;
-  private CleanupQueue directoryCleanupThread;
+  CleanupQueue directoryCleanupThread;
   volatile JvmManager jvmManager;
   
   private TaskMemoryManagerThread taskMemoryManager;
@@ -347,7 +349,6 @@ public class TaskTracker 
       shuffleMetricsRecord.update();
     }
   }
-  
 
   
   
@@ -395,7 +396,7 @@ public class TaskTracker 
         }
       }, "taskCleanup");
 
-  TaskController getTaskController() {
+  public TaskController getTaskController() {
     return taskController;
   }
   
@@ -444,72 +445,75 @@ public class TaskTracker 
     return TaskTracker.SUBDIR + Path.SEPARATOR + user;
   } 
 
-  static String getDistributedCacheDir() {
-    return TaskTracker.SUBDIR + Path.SEPARATOR + TaskTracker.DISTCACHEDIR;
+  Localizer getLocalizer() {
+    return localizer;
   }
 
-  static String getJobCacheSubdir() {
-    return TaskTracker.SUBDIR + Path.SEPARATOR + TaskTracker.JOBCACHE;
+  void setLocalizer(Localizer l) {
+    localizer = l;
   }
 
-  public static String getLocalJobDir(String user, String jobid) {
-    return getUserDir(user) + Path.SEPARATOR + getJobCacheSubdir() 
-        + Path.SEPARATOR + jobid;
-  } 
+  public static String getDistributedCacheDir(String user) {
+    return getUserDir(user) + Path.SEPARATOR + TaskTracker.DISTCACHEDIR;
+  }
 
-  static String getLocalJobDir(String jobid) {
-    return getJobCacheSubdir() + Path.SEPARATOR + jobid;
+  public static String getJobCacheSubdir(String user) {
+    return getUserDir(user) + Path.SEPARATOR + TaskTracker.JOBCACHE;
   }
 
-  static String getLocalJobConfFile(String jobid) {
-    return getLocalJobDir(jobid) + Path.SEPARATOR + TaskTracker.JOBFILE;
+  public static String getLocalJobDir(String user, String jobid) {
+    return getJobCacheSubdir(user) + Path.SEPARATOR + jobid;
   }
 
-  static String getTaskConfFile(String jobid, String taskid,
-      boolean isCleanupAttempt) {
-    return getLocalTaskDir(jobid, taskid, isCleanupAttempt) + Path.SEPARATOR
-        + TaskTracker.JOBFILE;
+  static String getLocalJobConfFile(String user, String jobid) {
+    return getLocalJobDir(user, jobid) + Path.SEPARATOR + TaskTracker.JOBFILE;
   }
 
-  static String getJobJarsDir(String jobid) {
-    return getLocalJobDir(jobid) + Path.SEPARATOR + TaskTracker.JARSDIR;
+  static String getTaskConfFile(String user, String jobid, String taskid,
+      boolean isCleanupAttempt) {
+    return getLocalTaskDir(user, jobid, taskid, isCleanupAttempt)
+    + Path.SEPARATOR + TaskTracker.JOBFILE;
   }
 
-  static String getJobJarFile(String jobid) {
-    return getJobJarsDir(jobid) + Path.SEPARATOR + "job.jar";
+  static String getJobJarsDir(String user, String jobid) {
+    return getLocalJobDir(user, jobid) + Path.SEPARATOR + TaskTracker.JARSDIR;
   }
 
-  static String getJobWorkDir(String jobid) {
-    return getLocalJobDir(jobid) + Path.SEPARATOR + MRConstants.WORKDIR;
+  static String getJobJarFile(String user, String jobid) {
+    return getJobJarsDir(user, jobid) + Path.SEPARATOR + "job.jar";
+  }
+  
+  static String getJobWorkDir(String user, String jobid) {
+    return getLocalJobDir(user, jobid) + Path.SEPARATOR + MRConstants.WORKDIR;
   }
 
-  static String getLocalSplitFile(String jobid, String taskid) {
-    return TaskTracker.getLocalTaskDir(jobid, taskid) + Path.SEPARATOR
+  static String getLocalSplitFile(String user, String jobid, String taskid) {
+    return TaskTracker.getLocalTaskDir(user, jobid, taskid) + Path.SEPARATOR
     + TaskTracker.LOCAL_SPLIT_FILE;
   }
 
-  static String getIntermediateOutputDir(String jobid, String taskid) {
-    return getLocalTaskDir(jobid, taskid) + Path.SEPARATOR
-        + TaskTracker.OUTPUT;
+  static String getIntermediateOutputDir(String user, String jobid,
+      String taskid) {
+    return getLocalTaskDir(user, jobid, taskid) + Path.SEPARATOR
+    + TaskTracker.OUTPUT;
   }
 
-  static String getLocalTaskDir(String jobid, String taskid) {
-    return getLocalTaskDir(jobid, taskid, false);
+  static String getLocalTaskDir(String user, String jobid, String taskid) {
+    return getLocalTaskDir(user, jobid, taskid, false);
   }
-
-  static String getLocalTaskDir(String jobid, String taskid,
+  
+  public static String getLocalTaskDir(String user, String jobid, String taskid,
       boolean isCleanupAttempt) {
-    String taskDir = getLocalJobDir(jobid) + Path.SEPARATOR + taskid;
+    String taskDir = getLocalJobDir(user, jobid) + Path.SEPARATOR + taskid;
     if (isCleanupAttempt) {
       taskDir = taskDir + TASK_CLEANUP_SUFFIX;
     }
     return taskDir;
   }
-
-  static String getTaskWorkDir(String jobid, String taskid,
+  
+  static String getTaskWorkDir(String user, String jobid, String taskid,
       boolean isCleanupAttempt) {
-    String dir =
-      getLocalJobDir(jobid) + Path.SEPARATOR + taskid;
+    String dir = getLocalJobDir(user, jobid) + Path.SEPARATOR + taskid;
     if (isCleanupAttempt) {
       dir = dir + TASK_CLEANUP_SUFFIX;
     }
@@ -677,7 +681,10 @@ public class TaskTracker 
     
     //setup and create jobcache directory with appropriate permissions
     taskController.setup();
-    
+
+    // create a localizer instance
+    setLocalizer(new Localizer(localFs, fConf.getLocalDirs(), taskController));
+
     //Start up node health checker service.
     if (shouldStartHealthMonitor(this.fConf)) {
       startHealthMonitor(this.fConf);
@@ -888,8 +895,11 @@ public class TaskTracker 
     Path localJarFile = null;
     Task t = tip.getTask();
     JobID jobId = t.getJobID();
-
     RunningJob rjob = addTaskToJob(jobId, tip);
+
+    // Initialize the user directories if needed.
+    getLocalizer().initializeUserDirs(t.getUser());
+
     synchronized (rjob) {
       if (!rjob.localized) {
         JobConf localJobConf = localizeJobFiles(t);
@@ -949,19 +959,19 @@ public class TaskTracker 
 
     // Initialize the job directories first
     FileSystem localFs = FileSystem.getLocal(fConf);
-    initializeJobDirs(jobId, localFs, fConf.getStrings("mapred.local.dir"));
+    getLocalizer().initializeJobDirs(userName, jobId);
 
     // Download the job.xml for this job from the system FS
     Path localJobFile =
-        localizeJobConfFile(new Path(t.getJobFile()), userFs, jobId);
+        localizeJobConfFile(new Path(t.getJobFile()), userName, userFs, jobId);
 
     JobConf localJobConf = new JobConf(localJobFile);
 
     // create the 'job-work' directory: job-specific shared directory for use as
     // scratch space by all tasks of the same job running on this TaskTracker.
     Path workDir =
-        lDirAlloc.getLocalPathForWrite(getJobWorkDir(jobId.toString()),
-          fConf);
+        lDirAlloc.getLocalPathForWrite(getJobWorkDir(userName,
+            jobId.toString()), fConf);
     if (!localFs.mkdirs(workDir)) {
       throw new IOException("Mkdirs failed to create "
           + workDir.toString());
@@ -970,154 +980,11 @@ public class TaskTracker 
     localJobConf.set(JOB_LOCAL_DIR, workDir.toUri().getPath());
 
     // Download the job.jar for this job from the system FS
-    localizeJobJarFile(jobId, userFs, localJobConf);
+    localizeJobJarFile(userName, jobId, userFs, localJobConf);
 
     return localJobConf;
   }
 
-  static class PermissionsHandler {
-    /**
-     * Permission information useful for setting permissions for a given path.
-     * Using this, one can set all possible combinations of permissions for the
-     * owner of the file. But permissions for the group and all others can only
-     * be set together, i.e. permissions for group cannot be set different from
-     * those for others and vice versa.
-     */
-    static class PermissionsInfo {
-      public boolean readPermissions;
-      public boolean writePermissions;
-      public boolean executablePermissions;
-      public boolean readPermsOwnerOnly;
-      public boolean writePermsOwnerOnly;
-      public boolean executePermsOwnerOnly;
-
-      /**
-       * Create a permissions-info object with the given attributes
-       *
-       * @param readPerms
-       * @param writePerms
-       * @param executePerms
-       * @param readOwnerOnly
-       * @param writeOwnerOnly
-       * @param executeOwnerOnly
-       */
-      public PermissionsInfo(boolean readPerms, boolean writePerms,
-          boolean executePerms, boolean readOwnerOnly, boolean writeOwnerOnly,
-          boolean executeOwnerOnly) {
-        readPermissions = readPerms;
-        writePermissions = writePerms;
-        executablePermissions = executePerms;
-        readPermsOwnerOnly = readOwnerOnly;
-        writePermsOwnerOnly = writeOwnerOnly;
-        executePermsOwnerOnly = executeOwnerOnly;
-      }
-    }
-
-    /**
-     * Set permission on the given file path using the specified permissions
-     * information. We use java api to set permission instead of spawning chmod
-     * processes. This saves a lot of time. Using this, one can set all possible
-     * combinations of permissions for the owner of the file. But permissions
-     * for the group and all others can only be set together, i.e. permissions
-     * for group cannot be set different from those for others and vice versa.
-     *
-     * This method should satisfy the needs of most of the applications. For
-     * those it doesn't, {@link FileUtil#chmod} can be used.
-     *
-     * @param f file path
-     * @param pInfo permissions information
-     * @return true if success, false otherwise
-     */
-    static boolean setPermissions(File f, PermissionsInfo pInfo) {
-      if (pInfo == null) {
-        LOG.debug(" PermissionsInfo is null, returning.");
-        return true;
-      }
-
-      LOG.debug("Setting permission for " + f.getAbsolutePath());
-
-      boolean ret = true;
-
-      // Clear all the flags
-      ret = f.setReadable(false, false) && ret;
-      ret = f.setWritable(false, false) && ret;
-      ret = f.setExecutable(false, false) && ret;
-
-      ret = f.setReadable(pInfo.readPermissions, pInfo.readPermsOwnerOnly);
-      LOG.debug("Readable status for " + f + " set to " + ret);
-      ret =
-        f.setWritable(pInfo.writePermissions, pInfo.writePermsOwnerOnly)
-        && ret;
-      LOG.debug("Writable status for " + f + " set to " + ret);
-      ret =
-        f.setExecutable(pInfo.executablePermissions,
-            pInfo.executePermsOwnerOnly)
-            && ret;
-
-      LOG.debug("Executable status for " + f + " set to " + ret);
-      return ret;
-    }
-
-    /**
-     * Permissions rwxr_xr_x
-     */
-    static PermissionsInfo sevenFiveFive =
-      new PermissionsInfo(true, true, true, false, true, false);
-    /**
-     * Completely private permissions
-     */
-    static PermissionsInfo sevenZeroZero =
-      new PermissionsInfo(true, true, true, true, true, true);
-  }
-
-  /**
-   * Prepare the job directories for a given job. To be called by the job
-   * localization code, only if the job is not already localized.
-   *
-   * <br>
-   * Here, we set 700 permissions on the job directories created on all disks.
-   * This we do so as to avoid any misuse by other users till the time
-   * {@link TaskController#initializeJob(JobInitializationContext)} is run at a
-   * later time to set proper private permissions on the job directories. <br>
-   *
-   * @param jobId
-   * @param fs
-   * @param localDirs
-   * @throws IOException
-   */
-  private static void initializeJobDirs(JobID jobId, FileSystem fs,
-      String[] localDirs)
-  throws IOException {
-    boolean initJobDirStatus = false;
-    String jobDirPath = getLocalJobDir(jobId.toString());
-    for (String localDir : localDirs) {
-      Path jobDir = new Path(localDir, jobDirPath);
-      if (fs.exists(jobDir)) {
-        // this will happen on a partial execution of localizeJob. Sometimes
-        // copying job.xml to the local disk succeeds but copying job.jar might
-        // throw out an exception. We should clean up and then try again.
-        fs.delete(jobDir, true);
-      }
-
-      boolean jobDirStatus = fs.mkdirs(jobDir);
-      if (!jobDirStatus) {
-        LOG.warn("Not able to create job directory " + jobDir.toString());
-      }
-
-      initJobDirStatus = initJobDirStatus || jobDirStatus;
-
-      // job-dir has to be private to the TT
-      PermissionsHandler.setPermissions(new File(jobDir.toUri().getPath()),
-          PermissionsHandler.sevenZeroZero);
-    }
-
-    if (!initJobDirStatus) {
-      throw new IOException("Not able to initialize job directories "
-          + "in any of the configured local directories for job "
-          + jobId.toString());
-    }
-  }
-
   /**
    * Download the job configuration file from the FS.
    *
@@ -1126,7 +993,7 @@ public class TaskTracker 
    * @return the local file system path of the downloaded file.
    * @throws IOException
    */
-  private Path localizeJobConfFile(Path jobFile, FileSystem userFs, JobID jobId)
+  private Path localizeJobConfFile(Path jobFile, String user, FileSystem userFs, JobID jobId)
   throws IOException {
     // Get sizes of JobFile and JarFile
     // sizes are -1 if they are not present.
@@ -1139,8 +1006,8 @@ public class TaskTracker 
       jobFileSize = -1;
     }
     Path localJobFile =
-      lDirAlloc.getLocalPathForWrite(getLocalJobConfFile(jobId.toString()),
-          jobFileSize, fConf);
+      lDirAlloc.getLocalPathForWrite(getLocalJobConfFile(user,
+          jobId.toString()), jobFileSize, fConf);
 
     // Download job.xml
     userFs.copyToLocalFile(jobFile, localJobFile);
@@ -1156,7 +1023,7 @@ public class TaskTracker 
    * @param localJobConf
    * @throws IOException
    */
-  private void localizeJobJarFile(JobID jobId, FileSystem userFs,
+  private void localizeJobJarFile(String user, JobID jobId, FileSystem userFs,
       JobConf localJobConf)
   throws IOException {
     // copy Jar file to the local FS and unjar it.
@@ -1171,11 +1038,11 @@ public class TaskTracker 
       } catch (FileNotFoundException fe) {
         jarFileSize = -1;
       }
-      // Here we check for and we check five times the size of jarFileSize
-      // to accommodate for unjarring the jar file in userfiles directory
+      // Here we check for five times the size of jarFileSize to accommodate for
+      // unjarring the jar file in the jars directory
       Path localJarFile =
-        lDirAlloc.getLocalPathForWrite(getJobJarFile(jobId.toString()),
-            5 * jarFileSize, fConf);
+        lDirAlloc.getLocalPathForWrite(
+            getJobJarFile(user, jobId.toString()), 5 * jarFileSize, fConf);
 
       //Download job.jar
       userFs.copyToLocalFile(jarFilePath, localJarFile);
@@ -1189,44 +1056,6 @@ public class TaskTracker 
     }
   }
 
-  /**
-   * Create taskDirs on all the disks. Otherwise, in some cases, like when
-   * LinuxTaskController is in use, child might wish to balance load across
-   * disks but cannot itself create attempt directory because of the fact that
-   * job directory is writable only by the TT.
-   *
-   * @param jobId
-   * @param attemptId
-   * @param isCleanupAttempt
-   * @param fs
-   * @param localDirs
-   * @throws IOException
-   */
-  private static void initializeAttemptDirs(String jobId, String attemptId,
-      boolean isCleanupAttempt, FileSystem fs, String[] localDirs)
-  throws IOException {
-
-    boolean initStatus = false;
-    String attemptDirPath =
-      getLocalTaskDir(jobId, attemptId, isCleanupAttempt);
-
-    for (String localDir : localDirs) {
-      Path localAttemptDir = new Path(localDir, attemptDirPath);
-
-      boolean attemptDirStatus = fs.mkdirs(localAttemptDir);
-      if (!attemptDirStatus) {
-        LOG.warn("localAttemptDir " + localAttemptDir.toString()
-            + " couldn't be created.");
-      }
-      initStatus = initStatus || attemptDirStatus;
-    }
-
-    if (!initStatus) {
-      throw new IOException("Not able to initialize attempt directories "
-          + "in any of the configured local directories for the attempt "
-          + attemptId);
-    }
-  }
   private void launchTaskForJob(TaskInProgress tip, JobConf jobConf)
       throws IOException{
     synchronized (tip) {
@@ -1266,7 +1095,7 @@ public class TaskTracker 
     }
     
     this.running = false;
-        
+
     // Clear local storage
     cleanupStorage();
         
@@ -1814,9 +1643,8 @@ public class TaskTracker 
         }
         // Delete the job directory for this  
         // task if the job is done/failed
-        if (!rjob.keepJobFiles){
-          directoryCleanupThread.addToQueue(localFs, getLocalFiles(fConf, 
-            getLocalJobDir(rjob.getJobID().toString())));
+        if (!rjob.keepJobFiles) {
+          removeJobFiles(rjob.jobConf.getUser(), rjob.getJobID().toString());
         }
         // Remove this job 
         rjob.tasks.clear();
@@ -1829,7 +1657,18 @@ public class TaskTracker 
     getJobTokenSecretManager().removeTokenForJob(jobId.toString());  
   }      
     
-    
+  /**
+   * This job's files are no longer needed on this TT, remove them.
+   *
+   * @param rjob
+   * @throws IOException
+   */
+  void removeJobFiles(String user, String jobId)
+  throws IOException {
+    directoryCleanupThread.addToQueue(localFs, getLocalFiles(fConf,
+        getLocalJobDir(user, jobId)));
+  }
+
   /**
    * Remove the tip and update all relevant state.
    * 
@@ -2282,14 +2121,14 @@ public class TaskTracker 
       FileSystem localFs = FileSystem.getLocal(fConf);
 
       // create taskDirs on all the disks.
-      initializeAttemptDirs(task.getJobID().toString(), task.getTaskID()
-          .toString(), task.isTaskCleanupTask(), localFs, fConf
-          .getStrings("mapred.local.dir"));
+      getLocalizer().initializeAttemptDirs(task.getUser(),
+          task.getJobID().toString(), task.getTaskID().toString(),
+          task.isTaskCleanupTask());
 
       // create the working-directory of the task 
       Path cwd =
-          lDirAlloc.getLocalPathForWrite(getTaskWorkDir(task.getJobID()
-              .toString(), task.getTaskID().toString(), task
+          lDirAlloc.getLocalPathForWrite(getTaskWorkDir(task.getUser(), task
+              .getJobID().toString(), task.getTaskID().toString(), task
               .isTaskCleanupTask()), defaultJobConf);
       if (!localFs.mkdirs(cwd)) {
         throw new IOException("Mkdirs failed to create " 
@@ -2351,10 +2190,14 @@ public class TaskTracker 
       return task;
     }
     
-    public TaskRunner getTaskRunner() {
+    TaskRunner getTaskRunner() {
       return runner;
     }
 
+    void setTaskRunner(TaskRunner rnr) {
+      this.runner = rnr;
+    }
+
     public synchronized void setJobConf(JobConf lconf){
       this.localJobConf = lconf;
       keepFailedTaskFiles = localJobConf.getKeepFailedTaskFiles();
@@ -2388,7 +2231,7 @@ public class TaskTracker 
         if (this.taskStatus.getRunState() == TaskStatus.State.UNASSIGNED) {
           this.taskStatus.setRunState(TaskStatus.State.RUNNING);
         }
-        this.runner = task.createRunner(TaskTracker.this, this);
+        setTaskRunner(task.createRunner(TaskTracker.this, this));
         this.runner.start();
         this.taskStatus.setStartTime(System.currentTimeMillis());
       } else {
@@ -2605,13 +2448,13 @@ public class TaskTracker 
               }
               File workDir = null;
               try {
-                workDir = new File(lDirAlloc.getLocalPathToRead(
-                                     TaskTracker.getLocalTaskDir( 
-                                       task.getJobID().toString(), 
-                                       task.getTaskID().toString(),
-                                       task.isTaskCleanupTask())
-                                     + Path.SEPARATOR + MRConstants.WORKDIR,
-                                     localJobConf). toString());
+                workDir =
+                    new File(lDirAlloc.getLocalPathToRead(
+                        TaskTracker.getLocalTaskDir(task.getUser(), task
+                            .getJobID().toString(), task.getTaskID()
+                            .toString(), task.isTaskCleanupTask())
+                            + Path.SEPARATOR + MRConstants.WORKDIR,
+                        localJobConf).toString());
               } catch (IOException e) {
                 LOG.warn("Working Directory of the task " + task.getTaskID() +
                                 " doesnt exist. Caught exception " +
@@ -2889,50 +2732,60 @@ public class TaskTracker 
         }
       }
       synchronized (this) {
+        // localJobConf could be null if localization has not happened
+        // then no cleanup will be required.
+        if (localJobConf == null) {
+          return;
+        }
         try {
-          // localJobConf could be null if localization has not happened
-          // then no cleanup will be required.
-          if (localJobConf == null) {
-            return;
-          }
-          String localTaskDir =
-              getLocalTaskDir(task.getJobID().toString(), taskId.toString(),
-                  task.isTaskCleanupTask());
-          String taskWorkDir =
-              getTaskWorkDir(task.getJobID().toString(), taskId.toString(),
-                  task.isTaskCleanupTask());
-          if (needCleanup) {
-            if (runner != null) {
-              //cleans up the output directory of the task (where map outputs 
-              //and reduce inputs get stored)
-              runner.close();
-            }
-
-            if (localJobConf.getNumTasksToExecutePerJvm() == 1) {
-              // No jvm reuse, remove everything
-              directoryCleanupThread.addToQueue(localFs,
-                  getLocalFiles(defaultJobConf,
-                  localTaskDir));
-            }  
-            else {
-              // Jvm reuse. We don't delete the workdir since some other task
-              // (running in the same JVM) might be using the dir. The JVM
-              // running the tasks would clean the workdir per a task in the
-              // task process itself.
-              directoryCleanupThread.addToQueue(localFs, getLocalFiles(
-                  defaultJobConf, localTaskDir + Path.SEPARATOR
-                      + TaskTracker.JOBFILE));
-            }
-          } else {
-            if (localJobConf.getNumTasksToExecutePerJvm() == 1) {
-              directoryCleanupThread.addToQueue(localFs,
-                  getLocalFiles(defaultJobConf,
-                  taskWorkDir));
-            }  
-          }
+          removeTaskFiles(needCleanup, taskId);
         } catch (Throwable ie) {
-          LOG.info("Error cleaning up task runner: " + 
-                   StringUtils.stringifyException(ie));
+          LOG.info("Error cleaning up task runner: "
+              + StringUtils.stringifyException(ie));
+        }
+      }
+    }
+
+    /**
+     * Some or all of the files from this task are no longer required. Remove
+     * them via CleanupQueue.
+     * 
+     * @param needCleanup
+     * @param taskId
+     * @throws IOException 
+     */
+    void removeTaskFiles(boolean needCleanup, TaskAttemptID taskId)
+        throws IOException {
+      if (needCleanup) {
+        if (runner != null) {
+          // cleans up the output directory of the task (where map outputs
+          // and reduce inputs get stored)
+          runner.close();
+        }
+
+        String localTaskDir =
+            getLocalTaskDir(task.getUser(), task.getJobID().toString(), taskId
+                .toString(), task.isTaskCleanupTask());
+        if (localJobConf.getNumTasksToExecutePerJvm() == 1) {
+          // No jvm reuse, remove everything
+          directoryCleanupThread.addToQueue(localFs, getLocalFiles(
+              defaultJobConf, localTaskDir));
+        } else {
+          // Jvm reuse. We don't delete the workdir since some other task
+          // (running in the same JVM) might be using the dir. The JVM
+          // running the tasks would clean the workdir per a task in the
+          // task process itself.
+          directoryCleanupThread.addToQueue(localFs, getLocalFiles(
+              defaultJobConf, localTaskDir + Path.SEPARATOR
+                  + TaskTracker.JOBFILE));
+        }
+      } else {
+        if (localJobConf.getNumTasksToExecutePerJvm() == 1) {
+          String taskWorkDir =
+              getTaskWorkDir(task.getUser(), task.getJobID().toString(),
+                  taskId.toString(), task.isTaskCleanupTask());
+          directoryCleanupThread.addToQueue(localFs, getLocalFiles(
+              defaultJobConf, taskWorkDir));
         }
       }
     }
@@ -3371,15 +3224,25 @@ public class TaskTracker 
         FileSystem rfs = ((LocalFileSystem)
             context.getAttribute("local.file.system")).getRaw();
 
-        // Index file
-        Path indexFileName = lDirAlloc.getLocalPathToRead(
-            TaskTracker.getIntermediateOutputDir(jobId, mapId)
-            + "/file.out.index", conf);
-        
-        // Map-output file
-        Path mapOutputFileName = lDirAlloc.getLocalPathToRead(
-            TaskTracker.getIntermediateOutputDir(jobId, mapId)
-            + "/file.out", conf);
+      String userName = null;
+      synchronized (tracker.runningJobs) {
+        RunningJob rjob = tracker.runningJobs.get(JobID.forName(jobId));
+        if (rjob == null) {
+          throw new IOException("Unknown job " + jobId + "!!");
+        }
+        userName = rjob.jobConf.getUser();
+      }
+      // Index file
+      Path indexFileName =
+          lDirAlloc.getLocalPathToRead(TaskTracker.getIntermediateOutputDir(
+              userName, jobId, mapId)
+              + "/file.out.index", conf);
+
+      // Map-output file
+      Path mapOutputFileName =
+          lDirAlloc.getLocalPathToRead(TaskTracker.getIntermediateOutputDir(
+              userName, jobId, mapId)
+              + "/file.out", conf);
 
         /**
          * Read the index file to get the information about where

Modified: hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/pipes/Application.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/pipes/Application.java?rev=1077116&r1=1077115&r2=1077116&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/pipes/Application.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/mapred/org/apache/hadoop/mapred/pipes/Application.java Fri Mar  4 03:42:38 2011
@@ -89,7 +89,11 @@ class Application<K1 extends WritableCom
     }
 
     String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
-    FileUtil.chmod(executable, "a+x");
+    if (!new File(executable).canExecute()) {
+      // LinuxTaskController sets +x permissions on all distcache files already.
+      // In case of DefaultTaskController, set permissions here.
+      FileUtil.chmod(executable, "u+x");
+    }
     cmd.add(executable);
     // wrap the command in a stdout/stderr capture
     TaskAttemptID taskid = TaskAttemptID.forName(conf.get("mapred.task.id"));



Mime
View raw message