mesos-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Benjamin Mahler <bmah...@apache.org>
Subject Re: Review Request 48367: Added test to verify that GPU auto-discovery works.
Date Thu, 16 Jun 2016 23:25:11 GMT

-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/48367/#review138110
-----------------------------------------------------------


Ship it!




Thanks for the tests! Here is the diff I applied before committing:

```
diff --git a/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp b/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp
index ffe1f57..e06d107 100644
--- a/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp
+++ b/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp
@@ -228,36 +228,46 @@ TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_FractionalResources)
 }
 
 
-// Test proper enumeration of available GPU devices.
-TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_VerifyResources)
+// Ensures that GPUs can be auto-discovered.
+TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_Discovery)
 {
   ASSERT_TRUE(nvml::isAvailable());
   ASSERT_SOME(nvml::initialize());
 
-  // Get the number of GPUs actually on this machine using NVML.
   Try<unsigned int> gpus = nvml::deviceGetCount();
   ASSERT_SOME(gpus);
 
-  // Set the `gpus` resource flag to 0.
   slave::Flags flags = CreateSlaveFlags();
-  flags.resources = "gpus:0";
+  flags.resources = "cpus:1"; // To override the default with gpus:0.
 
   Try<Resources> resources = Containerizer::resources(flags);
 
   ASSERT_SOME(resources);
-  ASSERT_NONE(resources->gpus());
+  ASSERT_SOME(resources->gpus());
+  ASSERT_EQ(gpus.get(), resources->gpus().get());
+}
 
-  // Don't set either `nvidia_gpu_devices` or the `gpus` resource flag.
-  flags = CreateSlaveFlags();
-  flags.resources = "cpus:1"; // To override the default with gpus:0.
 
-  resources = Containerizer::resources(flags);
+// Ensures that the --resources and --nvidia_gpu_devices
+// flags are correctly validated.
+TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_FlagValidation)
+{
+  ASSERT_TRUE(nvml::isAvailable());
+  ASSERT_SOME(nvml::initialize());
+
+  Try<unsigned int> gpus = nvml::deviceGetCount();
+  ASSERT_SOME(gpus);
+
+  // Setting the GPUs to 0 should not trigger auto-discovery!
+  slave::Flags flags = CreateSlaveFlags();
+  flags.resources = "gpus:0";
+
+  Try<Resources> resources = Containerizer::resources(flags);
 
   ASSERT_SOME(resources);
-  ASSERT_SOME(resources->gpus());
-  ASSERT_EQ(gpus.get(), resources->gpus().get());
+  ASSERT_NONE(resources->gpus());
 
-  // Set both the `nvidia_gpu_devices` and `gpus` resource flags.
+  // --nvidia_gpu_devices and --resources agree on the number of GPUs.
   flags = CreateSlaveFlags();
   flags.nvidia_gpu_devices = vector<unsigned int>({0u});
   flags.resources = "gpus:1";
@@ -268,7 +278,7 @@ TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_VerifyResources)
   ASSERT_SOME(resources->gpus());
   ASSERT_EQ(1u, resources->gpus().get());
 
-  // Set `nvidia_gpu_devices` but don't set the `gpus` resource flag.
+  // Both --resources and --nvidia_gpu_devices must be specified!
   flags = CreateSlaveFlags();
   flags.nvidia_gpu_devices = vector<unsigned int>({0u});
   flags.resources = "cpus:1"; // To override the default with gpus:0.
@@ -277,7 +287,6 @@ TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_VerifyResources)
 
   ASSERT_ERROR(resources);
 
-  // Don't set `nvidia_gpu_devices` but do set the `gpus` resource flag.
   flags = CreateSlaveFlags();
   flags.resources = "gpus:" + stringify(gpus.get());
 
@@ -285,8 +294,7 @@ TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_VerifyResources)
 
   ASSERT_ERROR(resources);
 
-  // Set `nvidia_gpu_devices` and the `gpus`
-  // resource flags to conflicting values.
+  // --nvidia_gpu_devices and --resources do not match!
   flags = CreateSlaveFlags();
   flags.nvidia_gpu_devices = vector<unsigned int>({0u});
   flags.resources = "gpus:2";
@@ -295,7 +303,6 @@ TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_VerifyResources)
 
   ASSERT_ERROR(resources);
 
-  // Set `nvidia_gpu_devices` when the `gpus` resource is 0.
   flags = CreateSlaveFlags();
   flags.nvidia_gpu_devices = vector<unsigned int>({0u});
   flags.resources = "gpus:0";
@@ -304,18 +311,23 @@ TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_VerifyResources)
 
   ASSERT_ERROR(resources);
 
-  // Set the `gpus` resource flag to 1000000.
+  // More than available on the machine!
   flags = CreateSlaveFlags();
-  flags.resources = "gpus:1000000";
+  flags.resources = "gpus:" + stringify(2 * gpus.get());
+  flags.nvidia_gpu_devices = vector<unsigned int>();
+
+  for (size_t i = 0; i < 2 * gpus.get(); ++i) {
+    flags.nvidia_gpu_devices->push_back(i);
+  }
 
   resources = Containerizer::resources(flags);
 
   ASSERT_ERROR(resources);
 
-  // Set `nvidia_gpu_devices` to contain repeated values.
+  // Set `nvidia_gpu_devices` to contains duplicates.
   flags = CreateSlaveFlags();
   flags.nvidia_gpu_devices = vector<unsigned int>({0u, 0u});
-  flags.resources = "cpus:1"; // To override the default with gpus:0.
+  flags.resources = "cpus:1;gpus:1";
 
   resources = Containerizer::resources(flags);
```


src/tests/containerizer/nvidia_gpu_isolator_tests.cpp (lines 231 - 232)
<https://reviews.apache.org/r/48367/#comment203325>

    Let's break this test apart so that it's a bit clearer what is being tested.
    
    For example:
    
    AutoDiscovery
    FlagValidation


- Benjamin Mahler


On June 11, 2016, 3:06 a.m., Kevin Klues wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/48367/
> -----------------------------------------------------------
> 
> (Updated June 11, 2016, 3:06 a.m.)
> 
> 
> Review request for mesos and Benjamin Mahler.
> 
> 
> Bugs: MESOS-5257
>     https://issues.apache.org/jira/browse/MESOS-5257
> 
> 
> Repository: mesos
> 
> 
> Description
> -------
> 
> Added test to verify that GPU auto-discovery works.
> 
> 
> Diffs
> -----
> 
>   src/tests/containerizer/nvidia_gpu_isolator_tests.cpp 65ffbafffeb5dac372b5770d2a1560a942a822e2

> 
> Diff: https://reviews.apache.org/r/48367/diff/
> 
> 
> Testing
> -------
> 
> GTEST_FILTER="" make -j check && sudo GTEST_FILTER="*NVIDIA*" src/mesos-tests
> 
> 
> Thanks,
> 
> Kevin Klues
> 
>


Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message