Skip to content

Commit

Permalink
Add missing calls to nvmlShutdown (#5311)
Browse files Browse the repository at this point in the history
- in some places, DALI initializes nvml but misses
  to call nvmlShutdown to shut it down gracefully

Signed-off-by: Janusz Lisiecki <[email protected]>
  • Loading branch information
JanuszL authored Feb 9, 2024
1 parent a48c723 commit 1b60777
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 13 deletions.
4 changes: 3 additions & 1 deletion dali/core/mm/malloc_resource.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ cuda_malloc_async_memory_resource::cuda_malloc_async_memory_resource(int device_
#if NVML_ENABLED
static const float driverVersion = []() {
nvml::Init();
return nvml::GetDriverVersion();
auto ret = nvml::GetDriverVersion();
nvml::Shutdown();
return ret;
}();
if (driverVersion < 470.60) {
cudaMemPool_t memPool;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,13 +182,10 @@ TYPED_TEST(nvjpegDecodeDecoupledAPITest, TestSingleTiffDecode4T) {
this->TiffTestDecode(4);
}

#if NVJPEG_VER_MAJOR >= 11
#if NVJPEG_VER_MAJOR >= 11 && NVML_ENABLED
void PrintDeviceInfo() {
unsigned int device_count;
if (!nvmlIsInitialized()) {
nvml::Init();
return;
}
nvml::Init();
CUDA_CALL(nvmlDeviceGetCount_v2(&device_count));
for (unsigned int device_idx = 0; device_idx < device_count; device_idx++) {
auto info = nvml::GetDeviceInfo(device_idx);
Expand All @@ -198,6 +195,7 @@ void PrintDeviceInfo() {
<< " cc_m " << info.cap_minor
<< std::endl;
}
nvml::Shutdown();
}

/**
Expand All @@ -206,7 +204,9 @@ void PrintDeviceInfo() {
bool ShouldUseHwDecoder() {
// HW decoder is disabled for drivers < 455.x, see
// dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h for details
nvml::Init();
static float driver_version = nvml::GetDriverVersion();
nvml::Shutdown();
static bool device_supports_hw_decoder = nvml::isHWDecoderSupported();
return device_supports_hw_decoder && driver_version >= 455;
}
Expand Down Expand Up @@ -510,7 +510,7 @@ class HwDecoderRandomCropUtilizationTest : public ::testing::Test {
TEST_F(HwDecoderRandomCropUtilizationTest, UtilizationTest) {
this->pipeline_.Run();
}
#endif
#endif // NVJPEG_VER_MAJOR >= 11 && NVML_ENABLED

class Nvjpeg2kTest : public ::testing::Test {
public:
Expand Down
4 changes: 3 additions & 1 deletion dali/operators/reader/gds_mem_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ void SkipIfIncompatible(TestBody &&body) {
#if NVML_ENABLED
static const int driverVersion = []() {
nvml::Init();
return nvml::GetCudaDriverVersion();
auto ret = nvml::GetCudaDriverVersion();
nvml::Shutdown();
return ret;
}();
#if defined(__aarch64__)
if (driverVersion < 12020) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ void VideoLoaderDecoderGpu::InitCudaStream() {
{
nvml::Init();
static float driver_version = nvml::GetDriverVersion();
nvml::Shutdown();
if (driver_version > 460 && driver_version < 470.21) {
DALI_WARN_ONCE("Warning: Decoding on a default stream. Performance may be affected.");
return;
Expand Down
5 changes: 1 addition & 4 deletions dali/operators/reader/video_reader_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ TEST_F(VideoReaderTest, MultipleVideoResolution) {
#if NVML_ENABLED
nvml::Init();
driverVersion = nvml::GetDriverVersion();
nvml::Shutdown();
#endif


Expand Down Expand Up @@ -188,10 +189,6 @@ TEST_F(VideoReaderTest, MultipleVideoResolution) {
FAIL() << "Unexpected label";
}
}

#if NVML_ENABLED
nvml::Shutdown();
#endif
}

TEST_F(VideoReaderTest, PackedBFrames) {
Expand Down
6 changes: 5 additions & 1 deletion dali/operators/sequence/optical_flow/optical_flow.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,11 @@ class OpticalFlow : public StatelessOperator<Backend> {
#endif
}

~OpticalFlow();
~OpticalFlow() {
#if NVML_ENABLED
nvml::Shutdown();
#endif
}
DISABLE_COPY_MOVE_ASSIGN(OpticalFlow);

protected:
Expand Down

0 comments on commit 1b60777

Please sign in to comment.