hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jh...@apache.org
Subject [hadoop] 04/04: YARN-9187. Backport YARN-6852 for GPU-specific native changes to branch-2
Date Thu, 24 Jan 2019 18:55:23 GMT
This is an automated email from the ASF dual-hosted git repository.

jhung pushed a commit to branch YARN-8200
in repository https://gitbox.apache.org/repos/asf/hadoop.git

commit 7da0c984625241c959a76e2efc150e37edf3f903
Author: Jonathan Hung <jhung@linkedin.com>
AuthorDate: Wed Jan 9 16:21:43 2019 -0500

    YARN-9187. Backport YARN-6852 for GPU-specific native changes to branch-2
---
 .../src/CMakeLists.txt                             |   8 +-
 .../container-executor/impl/container-executor.h   |   2 +
 .../src/main/native/container-executor/impl/main.c |  11 +
 .../impl/modules/cgroups/cgroups-operations.c      | 161 +++++++++++++++
 .../impl/modules/cgroups/cgroups-operations.h      |  55 +++++
 .../impl/modules/gpu/gpu-module.c                  | 229 +++++++++++++++++++++
 .../impl/modules/gpu/gpu-module.h                  |  45 ++++
 .../test/modules/cgroups/test-cgroups-module.cc    | 121 +++++++++++
 .../test/modules/gpu/test-gpu-module.cc            | 203 ++++++++++++++++++
 .../test/test-container-executor.c                 |   1 -
 10 files changed, 833 insertions(+), 3 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt
index 0b1c3e9..e9f8aff 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt
@@ -101,9 +101,11 @@ add_library(container
     main/native/container-executor/impl/container-executor.c
     main/native/container-executor/impl/get_executable.c
     main/native/container-executor/impl/utils/string-utils.c
+    main/native/container-executor/impl/utils/docker-util.c
     main/native/container-executor/impl/utils/path-utils.c
+    main/native/container-executor/impl/modules/cgroups/cgroups-operations.c
     main/native/container-executor/impl/modules/common/module-configs.c
-    main/native/container-executor/impl/utils/docker-util.c
+    main/native/container-executor/impl/modules/gpu/gpu-module.c
 )
 
 add_executable(container-executor
@@ -135,6 +137,8 @@ add_executable(cetest
         main/native/container-executor/test/utils/test-string-utils.cc
         main/native/container-executor/test/utils/test-path-utils.cc
         main/native/container-executor/test/test_util.cc
-        main/native/container-executor/test/utils/test_docker_util.cc)
+        main/native/container-executor/test/utils/test_docker_util.cc
+        main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc
+        main/native/container-executor/test/modules/gpu/test-gpu-module.cc)
 target_link_libraries(cetest gtest container)
 output_directory(cetest test)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
index 956b38c..a78b077 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
@@ -285,3 +285,5 @@ int execute_regex_match(const char *regex_str, const char *input);
  * Return 0 on success.
  */
 int validate_docker_image_name(const char *image_name);
+
+struct configuration* get_cfg();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
index 930dabe..9cf34a0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
@@ -22,6 +22,8 @@
 #include "util.h"
 #include "get_executable.h"
 #include "utils/string-utils.h"
+#include "modules/gpu/gpu-module.h"
+#include "modules/cgroups/cgroups-operations.h"
 
 #include <errno.h>
 #include <grp.h>
@@ -241,6 +243,14 @@ static int validate_arguments(int argc, char **argv , int *operation)
{
     return INVALID_ARGUMENT_NUMBER;
   }
 
+  /*
+   * Check if it is a known module, if yes, redirect to module
+   */
+  if (strcmp("--module-gpu", argv[1]) == 0) {
+    return handle_gpu_request(&update_cgroups_parameters, "gpu", argc - 1,
+           &argv[1]);
+  }
+
   if (strcmp("--checksetup", argv[1]) == 0) {
     *operation = CHECK_SETUP;
     return 0;
@@ -325,6 +335,7 @@ static int validate_arguments(int argc, char **argv , int *operation)
{
         return FEATURE_DISABLED;
     }
   }
+
   /* Now we have to validate 'run as user' operations that don't use
     a 'long option' - we should fix this at some point. The validation/argument
     parsing here is extensive enough that it done in a separate function */
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c
new file mode 100644
index 0000000..b234109
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "configuration.h"
+#include "container-executor.h"
+#include "utils/string-utils.h"
+#include "utils/path-utils.h"
+#include "modules/common/module-configs.h"
+#include "modules/common/constants.h"
+#include "modules/cgroups/cgroups-operations.h"
+#include "util.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#define MAX_PATH_LEN 4096
+
+static const struct section* cgroup_cfg_section = NULL;
+
+void reload_cgroups_configuration() {
+  cgroup_cfg_section = get_configuration_section(CGROUPS_SECTION_NAME, get_cfg());
+}
+
+char* get_cgroups_path_to_write(
+    const char* hierarchy_name,
+    const char* param_name,
+    const char* group_id) {
+  int failed = 0;
+  char* buffer = NULL;
+  const char* cgroups_root = get_section_value(CGROUPS_ROOT_KEY,
+     cgroup_cfg_section);
+  const char* yarn_hierarchy_name = get_section_value(
+     CGROUPS_YARN_HIERARCHY_KEY, cgroup_cfg_section);
+
+  // Make sure it is defined.
+  if (!cgroups_root || cgroups_root[0] == 0) {
+    fprintf(ERRORFILE, "%s is not defined in container-executor.cfg\n",
+      CGROUPS_ROOT_KEY);
+    failed = 1;
+    goto cleanup;
+  }
+
+  // Make sure it is defined.
+  if (!yarn_hierarchy_name || yarn_hierarchy_name[0] == 0) {
+    fprintf(ERRORFILE, "%s is not defined in container-executor.cfg\n",
+      CGROUPS_YARN_HIERARCHY_KEY);
+    failed = 1;
+    goto cleanup;
+  }
+
+  buffer = malloc(MAX_PATH_LEN + 1);
+  if (!buffer) {
+    fprintf(ERRORFILE, "Failed to allocate memory for output path.\n");
+    failed = 1;
+    goto cleanup;
+  }
+
+  // Make a path.
+  // CGroups path should not be too long.
+  if (snprintf(buffer, MAX_PATH_LEN, "%s/%s/%s/%s/%s.%s",
+    cgroups_root, hierarchy_name, yarn_hierarchy_name,
+    group_id, hierarchy_name, param_name) < 0) {
+    fprintf(ERRORFILE, "Failed to print output path.\n");
+    failed = 1;
+    goto cleanup;
+  }
+
+cleanup:
+  if (failed) {
+    if (buffer) {
+      free(buffer);
+    }
+    return NULL;
+  }
+  return buffer;
+}
+
+int update_cgroups_parameters(
+   const char* hierarchy_name,
+   const char* param_name,
+   const char* group_id,
+   const char* value) {
+#ifndef __linux
+  fprintf(ERRORFILE, "Failed to update cgroups parameters, not supported\n");
+  return -1;
+#endif
+  int failure = 0;
+
+  if (!cgroup_cfg_section) {
+    reload_cgroups_configuration();
+  }
+
+  char* full_path = get_cgroups_path_to_write(hierarchy_name, param_name,
+    group_id);
+
+  if (!full_path) {
+    fprintf(ERRORFILE,
+      "Failed to get cgroups path to write, it should be a configuration issue");
+    failure = 1;
+    goto cleanup;
+  }
+
+  if (!verify_path_safety(full_path)) {
+    failure = 1;
+    goto cleanup;
+  }
+
+  // Make sure file exists
+  struct stat sb;
+  if (stat(full_path, &sb) != 0) {
+    fprintf(ERRORFILE, "CGroups: Could not find file to write, %s", full_path);
+    failure = 1;
+    goto cleanup;
+  }
+
+  fprintf(ERRORFILE, "CGroups: Updating cgroups, path=%s, value=%s",
+    full_path, value);
+
+  // Write values to file
+  FILE *f;
+  f = fopen(full_path, "a");
+  if (!f) {
+    fprintf(ERRORFILE, "CGroups: Failed to open cgroups file, %s", full_path);
+    failure = 1;
+    goto cleanup;
+  }
+  if (fprintf(f, "%s", value) < 0) {
+    fprintf(ERRORFILE, "CGroups: Failed to write cgroups file, %s", full_path);
+    fclose(f);
+    failure = 1;
+    goto cleanup;
+  }
+  if (fclose(f) != 0) {
+    fprintf(ERRORFILE, "CGroups: Failed to close cgroups file, %s", full_path);
+    failure = 1;
+    goto cleanup;
+  }
+
+cleanup:
+  if (full_path) {
+    free(full_path);
+  }
+  return -failure;
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h
new file mode 100644
index 0000000..cf80bcf
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _CGROUPS_OPERATIONS_H_
+#define _CGROUPS_OPERATIONS_H_
+
+#define CGROUPS_SECTION_NAME "cgroups"
+#define CGROUPS_ROOT_KEY "root"
+#define CGROUPS_YARN_HIERARCHY_KEY "yarn-hierarchy"
+
+/**
+ * Handle update CGroups parameter update requests:
+ * - hierarchy_name: e.g. devices / cpu,cpuacct
+ * - param_name: e.g. deny
+ * - group_id: e.g. container_x_y
+ * - value: e.g. "a *:* rwm"
+ *
+ * return 0 if succeeded
+ */
+int update_cgroups_parameters(
+   const char* hierarchy_name,
+   const char* param_name,
+   const char* group_id,
+   const char* value);
+
+ /**
+  * Get CGroups path to update. Visible for testing.
+  * Return 0 if succeeded
+  */
+ char* get_cgroups_path_to_write(
+    const char* hierarchy_name,
+    const char* param_name,
+    const char* group_id);
+
+ /**
+  * Reload config from filesystem, visible for testing.
+  */
+ void reload_cgroups_configuration();
+
+#endif
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c
new file mode 100644
index 0000000..f96645d
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c
@@ -0,0 +1,229 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "configuration.h"
+#include "container-executor.h"
+#include "utils/string-utils.h"
+#include "modules/gpu/gpu-module.h"
+#include "modules/cgroups/cgroups-operations.h"
+#include "modules/common/module-configs.h"
+#include "modules/common/constants.h"
+#include "util.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <unistd.h>
+
+#define EXCLUDED_GPUS_OPTION "excluded_gpus"
+#define CONTAINER_ID_OPTION "container_id"
+#define DEFAULT_NVIDIA_MAJOR_NUMBER 195
+#define MAX_CONTAINER_ID_LEN 128
+
+static const struct section* cfg_section;
+
+static int internal_handle_gpu_request(
+    update_cgroups_parameters_func update_cgroups_parameters_func_p,
+    size_t n_minor_devices_to_block, int minor_devices[],
+    const char* container_id) {
+  char* allowed_minor_numbers_str = NULL;
+  int* allowed_minor_numbers = NULL;
+  size_t n_allowed_minor_numbers = 0;
+  int return_code = 0;
+
+  if (n_minor_devices_to_block == 0) {
+    // no device to block, just return;
+    return 0;
+  }
+
+  // Get major device number from cfg, if not set, major number of (Nvidia)
+  // will be the default value.
+  int major_device_number;
+  char* major_number_str = get_section_value(GPU_MAJOR_NUMBER_CONFIG_KEY,
+     cfg_section);
+  if (!major_number_str || 0 == major_number_str[0]) {
+    // Default major number of Nvidia devices
+    major_device_number = DEFAULT_NVIDIA_MAJOR_NUMBER;
+  } else {
+    major_device_number = strtol(major_number_str, NULL, 0);
+  }
+
+  // Get allowed minor device numbers from cfg, if not set, means all minor
+  // devices can be used by YARN
+  allowed_minor_numbers_str = get_section_value(
+      GPU_ALLOWED_DEVICES_MINOR_NUMBERS,
+      cfg_section);
+  if (!allowed_minor_numbers_str || 0 == allowed_minor_numbers_str[0]) {
+    allowed_minor_numbers = NULL;
+  } else {
+    int rc = get_numbers_split_by_comma(allowed_minor_numbers_str,
+                                        &allowed_minor_numbers,
+                                        &n_allowed_minor_numbers);
+    if (0 != rc) {
+      fprintf(ERRORFILE,
+          "Failed to get allowed minor device numbers from cfg, value=%s\n",
+          allowed_minor_numbers_str);
+      return_code = -1;
+      goto cleanup;
+    }
+
+    // Make sure we're trying to black devices allowed in config
+    for (int i = 0; i < n_minor_devices_to_block; i++) {
+      int found = 0;
+      for (int j = 0; j < n_allowed_minor_numbers; j++) {
+        if (minor_devices[i] == allowed_minor_numbers[j]) {
+          found = 1;
+          break;
+        }
+      }
+
+      if (!found) {
+        fprintf(ERRORFILE,
+          "Trying to blacklist device with minor-number=%d which is not on allowed list\n",
+          minor_devices[i]);
+        return_code = -1;
+        goto cleanup;
+      }
+    }
+  }
+
+  // Use cgroup helpers to blacklist devices
+  for (int i = 0; i < n_minor_devices_to_block; i++) {
+    char param_value[128];
+    memset(param_value, 0, sizeof(param_value));
+    snprintf(param_value, sizeof(param_value), "c %d:%d rwm",
+             major_device_number, i);
+
+    int rc = update_cgroups_parameters_func_p("devices", "deny",
+      container_id, param_value);
+
+    if (0 != rc) {
+      fprintf(ERRORFILE, "CGroups: Failed to update cgroups\n");
+      return_code = -1;
+      goto cleanup;
+    }
+  }
+
+cleanup:
+  if (major_number_str) {
+    free(major_number_str);
+  }
+  if (allowed_minor_numbers) {
+    free(allowed_minor_numbers);
+  }
+  if (allowed_minor_numbers_str) {
+    free(allowed_minor_numbers_str);
+  }
+
+  return return_code;
+}
+
+void reload_gpu_configuration() {
+  cfg_section = get_configuration_section(GPU_MODULE_SECTION_NAME, get_cfg());
+}
+
+/*
+ * Format of GPU request commandline:
+ *
+ * c-e gpu --excluded_gpus 0,1,3 --container_id container_x_y
+ */
+int handle_gpu_request(update_cgroups_parameters_func func,
+    const char* module_name, int module_argc, char** module_argv) {
+  if (!cfg_section) {
+    reload_gpu_configuration();
+  }
+
+  if (!module_enabled(cfg_section, GPU_MODULE_SECTION_NAME)) {
+    fprintf(ERRORFILE,
+      "Please make sure gpu module is enabled before using it.\n");
+    return -1;
+  }
+
+  static struct option long_options[] = {
+    {EXCLUDED_GPUS_OPTION, required_argument, 0, 'e' },
+    {CONTAINER_ID_OPTION, required_argument, 0, 'c' },
+    {0, 0, 0, 0}
+  };
+
+  int rc = 0;
+  int c = 0;
+  int option_index = 0;
+
+  int* minor_devices = NULL;
+  char container_id[MAX_CONTAINER_ID_LEN];
+  memset(container_id, 0, sizeof(container_id));
+  size_t n_minor_devices_to_block = 0;
+  int failed = 0;
+
+  optind = 1;
+  while((c = getopt_long(module_argc, module_argv, "e:c:",
+                         long_options, &option_index)) != -1) {
+    switch(c) {
+      case 'e':
+        rc = get_numbers_split_by_comma(optarg, &minor_devices,
+          &n_minor_devices_to_block);
+        if (0 != rc) {
+          fprintf(ERRORFILE,
+            "Failed to get minor devices number from command line, value=%s\n",
+            optarg);
+          failed = 1;
+          goto cleanup;
+        }
+        break;
+      case 'c':
+        if (!validate_container_id(optarg)) {
+          fprintf(ERRORFILE,
+            "Specified container_id=%s is invalid\n", optarg);
+          failed = 1;
+          goto cleanup;
+        }
+        strncpy(container_id, optarg, MAX_CONTAINER_ID_LEN);
+        break;
+      default:
+        fprintf(ERRORFILE,
+          "Unknown option in gpu command character %d %c, optionindex = %d\n",
+          c, c, optind);
+        failed = 1;
+        goto cleanup;
+    }
+  }
+
+  if (0 == container_id[0]) {
+    fprintf(ERRORFILE,
+      "[%s] --container_id must be specified.\n", __func__);
+    failed = 1;
+    goto cleanup;
+  }
+
+  if (!minor_devices) {
+     // Minor devices is null, skip following call.
+     fprintf(ERRORFILE, "is not specified, skip cgroups call.\n");
+     goto cleanup;
+  }
+
+  failed = internal_handle_gpu_request(func, n_minor_devices_to_block,
+         minor_devices,
+         container_id);
+
+cleanup:
+  if (minor_devices) {
+    free(minor_devices);
+  }
+  return failed;
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h
new file mode 100644
index 0000000..59d4c7e
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __FreeBSD__
+#define _WITH_GETLINE
+#endif
+
+#ifndef _MODULES_GPU_GPU_MUDULE_H_
+#define _MODULES_GPU_GPU_MUDULE_H_
+
+#define GPU_MAJOR_NUMBER_CONFIG_KEY "gpu.major-device-number"
+#define GPU_ALLOWED_DEVICES_MINOR_NUMBERS "gpu.allowed-device-minor-numbers"
+#define GPU_MODULE_SECTION_NAME "gpu"
+
+// For unit test stubbing
+typedef int (*update_cgroups_parameters_func)(const char*, const char*,
+   const char*, const char*);
+
+/**
+ * Handle gpu requests
+ */
+int handle_gpu_request(update_cgroups_parameters_func func,
+   const char* module_name, int module_argc, char** module_argv);
+
+/**
+ * Reload config from filesystem, visible for testing.
+ */
+void reload_gpu_configuration();
+
+#endif
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc
new file mode 100644
index 0000000..8ffbe88
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <gtest/gtest.h>
+#include <sstream>
+
+extern "C" {
+#include "configuration.h"
+#include "container-executor.h"
+#include "modules/cgroups/cgroups-operations.h"
+#include "test/test-container-executor-common.h"
+#include "util.h"
+}
+
+namespace ContainerExecutor {
+
+class TestCGroupsModule : public ::testing::Test {
+protected:
+  virtual void SetUp() {
+    if (mkdirs(TEST_ROOT, 0755) != 0) {
+      fprintf(ERRORFILE, "Failed to mkdir TEST_ROOT: %s\n", TEST_ROOT);
+      exit(1);
+    }
+    LOGFILE = stdout;
+    ERRORFILE = stderr;
+  }
+
+  virtual void TearDown() {}
+};
+
+TEST_F(TestCGroupsModule, test_cgroups_get_path_without_define_root) {
+  // Write config file.
+  const char *filename = TEST_ROOT "/test_cgroups_get_path_without_root.cfg";
+  FILE *file = fopen(filename, "w");
+  if (file == NULL) {
+    printf("FAIL: Could not open configuration file: %s\n", filename);
+    exit(1);
+  }
+  fprintf(file, "[cgroups]\n");
+  fprintf(file, "yarn-hierarchy=yarn\n");
+  fclose(file);
+
+  // Read config file
+  read_executor_config(filename);
+  reload_cgroups_configuration();
+
+  char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
+
+  ASSERT_TRUE(NULL == path) << "Should fail.\n";
+}
+
+TEST_F(TestCGroupsModule, test_cgroups_get_path_without_define_yarn_hierarchy) {
+  // Write config file.
+  const char *filename = TEST_ROOT "/test_cgroups_get_path_without_root.cfg";
+  FILE *file = fopen(filename, "w");
+
+  ASSERT_TRUE(file) << "FAIL: Could not open configuration file: " << filename
+                    << "\n";
+  fprintf(file, "[cgroups]\n");
+  fprintf(file, "root=/sys/fs/cgroups\n");
+  fclose(file);
+
+  // Read config file
+  read_executor_config(filename);
+  reload_cgroups_configuration();
+  char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
+
+  ASSERT_TRUE(NULL == path) << "Should fail.\n";
+}
+
+TEST_F(TestCGroupsModule, test_cgroups_get_path_succeeded) {
+  // Write config file.
+  const char *filename = TEST_ROOT "/test_cgroups_get_path.cfg";
+  FILE *file = fopen(filename, "w");
+
+  ASSERT_TRUE(file) << "FAIL: Could not open configuration file\n";
+  fprintf(file, "[cgroups]\n");
+  fprintf(file, "root=/sys/fs/cgroups \n");
+  fprintf(file, "yarn-hierarchy=yarn \n");
+  fclose(file);
+
+  // Read config file
+  read_executor_config(filename);
+  reload_cgroups_configuration();
+
+  char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
+  ASSERT_TRUE(NULL != path) << "Should success.\n";
+
+  const char *EXPECTED =
+      "/sys/fs/cgroups/devices/yarn/container_1/devices.deny";
+
+  ASSERT_STREQ(EXPECTED, path)
+      << "Return cgroup-path-to-write is not expected\n";
+}
+} // namespace ContainerExecutor
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/gpu/test-gpu-module.cc
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/gpu/test-gpu-module.cc
new file mode 100644
index 0000000..7e41fb4
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/gpu/test-gpu-module.cc
@@ -0,0 +1,203 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <gtest/gtest.h>
+#include <sstream>
+
+extern "C" {
+#include "configuration.h"
+#include "container-executor.h"
+#include "modules/cgroups/cgroups-operations.h"
+#include "modules/gpu/gpu-module.h"
+#include "test/test-container-executor-common.h"
+#include "util.h"
+}
+
+namespace ContainerExecutor {
+
+class TestGpuModule : public ::testing::Test {
+protected:
+  virtual void SetUp() {
+    if (mkdirs(TEST_ROOT, 0755) != 0) {
+      fprintf(ERRORFILE, "Failed to mkdir TEST_ROOT: %s\n", TEST_ROOT);
+      exit(1);
+    }
+    LOGFILE = stdout;
+    ERRORFILE = stderr;
+  }
+
+  virtual void TearDown() {
+
+  }
+};
+
+static std::vector<const char*> cgroups_parameters_invoked;
+
+static int mock_update_cgroups_parameters(
+   const char* controller_name,
+   const char* param_name,
+   const char* group_id,
+   const char* value) {
+  char* buf = (char*) malloc(128);
+  strcpy(buf, controller_name);
+  cgroups_parameters_invoked.push_back(buf);
+
+  buf = (char*) malloc(128);
+  strcpy(buf, param_name);
+  cgroups_parameters_invoked.push_back(buf);
+
+  buf = (char*) malloc(128);
+  strcpy(buf, group_id);
+  cgroups_parameters_invoked.push_back(buf);
+
+  buf = (char*) malloc(128);
+  strcpy(buf, value);
+  cgroups_parameters_invoked.push_back(buf);
+  return 0;
+}
+
+static void verify_param_updated_to_cgroups(
+    int argc, const char** argv) {
+  ASSERT_EQ(argc, cgroups_parameters_invoked.size());
+
+  int offset = 0;
+  while (offset < argc) {
+    ASSERT_STREQ(argv[offset], cgroups_parameters_invoked[offset]);
+    offset++;
+  }
+}
+
+static void write_and_load_gpu_module_to_cfg(const char* cfg_filepath, int enabled) {
+  FILE *file = fopen(cfg_filepath, "w");
+  if (file == NULL) {
+    printf("FAIL: Could not open configuration file: %s\n", cfg_filepath);
+    exit(1);
+  }
+  fprintf(file, "[gpu]\n");
+  if (enabled) {
+    fprintf(file, "module.enabled=true\n");
+  } else {
+    fprintf(file, "module.enabled=false\n");
+  }
+  fclose(file);
+
+  // Read config file
+  read_executor_config(cfg_filepath);
+  reload_gpu_configuration();
+}
+
+static void test_gpu_module_enabled_disabled(int enabled) {
+  // Write config file.
+  const char *filename = TEST_ROOT "/test_cgroups_module_enabled_disabled.cfg";
+  write_and_load_gpu_module_to_cfg(filename, enabled);
+
+  char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
+                   (char*) "--container_id",
+                   (char*) "container_1498064906505_0001_01_000001" };
+
+  int rc = handle_gpu_request(&mock_update_cgroups_parameters,
+              "gpu", 5, argv);
+
+  int EXPECTED_RC;
+  if (enabled) {
+    EXPECTED_RC = 0;
+  } else {
+    EXPECTED_RC = -1;
+  }
+  ASSERT_EQ(EXPECTED_RC, rc);
+}
+
+TEST_F(TestGpuModule, test_verify_gpu_module_calls_cgroup_parameter) {
+  // Write config file.
+  const char *filename = TEST_ROOT "/test_verify_gpu_module_calls_cgroup_parameter.cfg";
+  write_and_load_gpu_module_to_cfg(filename, 1);
+
+  char* container_id = (char*) "container_1498064906505_0001_01_000001";
+  char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
+                   (char*) "--container_id",
+                   container_id };
+
+  /* Test case 1: block 2 devices */
+  cgroups_parameters_invoked.clear();
+  int rc = handle_gpu_request(&mock_update_cgroups_parameters,
+     "gpu", 5, argv);
+  ASSERT_EQ(0, rc) << "Should success.\n";
+
+  // Verify cgroups parameters
+  const char* expected_cgroups_argv[] = { "devices", "deny", container_id, "c 195:0 rwm",
+    "devices", "deny", container_id, "c 195:1 rwm"};
+  verify_param_updated_to_cgroups(8, expected_cgroups_argv);
+
+  /* Test case 2: block 0 devices */
+  cgroups_parameters_invoked.clear();
+  char* argv_1[] = { (char*) "--module-gpu", (char*) "--container_id", container_id };
+  rc = handle_gpu_request(&mock_update_cgroups_parameters,
+     "gpu", 3, argv_1);
+  ASSERT_EQ(0, rc) << "Should success.\n";
+
+  // Verify cgroups parameters
+  verify_param_updated_to_cgroups(0, NULL);
+}
+
+TEST_F(TestGpuModule, test_illegal_cli_parameters) {
+  // Write config file.
+  const char *filename = TEST_ROOT "/test_illegal_cli_parameters.cfg";
+  write_and_load_gpu_module_to_cfg(filename, 1);
+
+  // Illegal container id - 1
+  char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
+                   (char*) "--container_id", (char*) "xxxx" };
+  int rc = handle_gpu_request(&mock_update_cgroups_parameters,
+     "gpu", 5, argv);
+  ASSERT_NE(0, rc) << "Should fail.\n";
+
+  // Illegal container id - 2
+  char* argv_1[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
+                   (char*) "--container_id", (char*) "container_1" };
+  rc = handle_gpu_request(&mock_update_cgroups_parameters,
+     "gpu", 5, argv_1);
+  ASSERT_NE(0, rc) << "Should fail.\n";
+
+  // Illegal container id - 3
+  char* argv_2[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1" };
+  rc = handle_gpu_request(&mock_update_cgroups_parameters,
+     "gpu", 3, argv_2);
+  ASSERT_NE(0, rc) << "Should fail.\n";
+}
+
+TEST_F(TestGpuModule, test_gpu_module_disabled) {
+  test_gpu_module_enabled_disabled(0);
+}
+
+TEST_F(TestGpuModule, test_gpu_module_enabled) {
+  test_gpu_module_enabled_disabled(1);
+}
+} // namespace ContainerExecutor
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
index 9e85b3f..235ea77 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
@@ -1392,7 +1392,6 @@ int main(int argc, char **argv) {
 #endif
 
   test_trim_function();
-  run("rm -fr " TEST_ROOT);
   printf("\nFinished tests\n");
 
   free(current_username);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message