Skip to content

Commit

Permalink
Add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
gjulianm committed Oct 18, 2024
1 parent b45ad3e commit 5cfca92
Show file tree
Hide file tree
Showing 4 changed files with 294 additions and 4 deletions.
14 changes: 14 additions & 0 deletions .mockery.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,17 @@ packages:
replace-type:
# https://github.com/vektra/mockery/issues/331
- github.com/DataDog/datadog-agent/pkg/serializer/types.stubMessageBody=github.com/DataDog/datadog-agent/pkg/serializer/types.ProcessMessageBody

# gpu
github.com/DataDog/datadog-agent/pkg/collector/corechecks/gpu/nvmlmetrics:
interfaces:
subsystemCollector:
config:
with-expecter: true
mock-build-tags: "linux && test"
# subsystemCollector is an internal interface, so we cannot use a mock in a separate package
inpackage: true
dir: "{{.InterfaceDir}}"
filename: "mocks.go"
outpkg: nvmlmetrics
mockname: mockSubsystemCollector
13 changes: 9 additions & 4 deletions pkg/collector/corechecks/gpu/nvmlmetrics/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ var allSubsystems = map[string]subsystemFactory{}

// NewCollector creates a new Collector that will collect metrics from the given NVML library.
func NewCollector(lib nvml.Interface) (*Collector, error) {
return newCollectorWithSubsystems(lib, allSubsystems)
}

// newCollectorWithSubsystems allows specifying which subsystems to use when creating the collector, useful for tests.
func newCollectorWithSubsystems(lib nvml.Interface, subsystems map[string]subsystemFactory) (*Collector, error) {
ret := nvml.SUCCESS
coll := &Collector{
lib: lib,
Expand All @@ -74,7 +79,7 @@ func NewCollector(lib nvml.Interface) (*Collector, error) {
coll.devices = append(coll.devices, dev)
}

for name, factory := range allSubsystems {
for name, factory := range subsystems {
subsystem, err := factory(lib, coll.devices)
if err != nil {
coll.Close() // Close all previously created subsystems
Expand All @@ -95,9 +100,9 @@ func (coll *Collector) Collect() ([]Metric, error) {
var err error

for _, dev := range coll.devices {
tags, err := getTagsFromDevice(dev)
if err != nil {
return allMetrics, fmt.Errorf("failed to get tags for device: %w", err)
tags, tagsErr := getTagsFromDevice(dev)
if tagsErr != nil {
return allMetrics, fmt.Errorf("failed to get tags for device: %w", tagsErr)
}

for _, subsystem := range coll.collectors {
Expand Down
86 changes: 86 additions & 0 deletions pkg/collector/corechecks/gpu/nvmlmetrics/collector_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

//go:build linux

package nvmlmetrics

import (
"errors"
"testing"

"github.com/NVIDIA/go-nvml/pkg/nvml"
nvmlmock "github.com/NVIDIA/go-nvml/pkg/nvml/mock"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
)

// GetBasicNvmlMock returns a mock of the nvml.Interface with a single device with 10 cores,
// useful for basic tests that need only the basic interaction with NVML to be working.
func getBasicNvmlMock() *nvmlmock.Interface {
return &nvmlmock.Interface{
DeviceGetCountFunc: func() (int, nvml.Return) {
return 1, nvml.SUCCESS
},
DeviceGetHandleByIndexFunc: func(int) (nvml.Device, nvml.Return) {
return &nvmlmock.Device{
GetUUIDFunc: func() (string, nvml.Return) {
return "GPU-123", nvml.SUCCESS
},
GetNameFunc: func() (string, nvml.Return) {
return "Tesla UltraMegaPower", nvml.SUCCESS
},
}, nvml.SUCCESS
},
}
}

func TestCollectorsGetClosedIfInitFails(t *testing.T) {
succeedCollector := &mockSubsystemCollector{}
factorySucceeded := false

// On the first call, this function returns correctly. On the second it fails.
// We need this as we cannot rely on the order of the subsystems in the map.
factory := func(lib nvml.Interface, devices []nvml.Device) (subsystemCollector, error) {
if !factorySucceeded {
factorySucceeded = true
return succeedCollector, nil
}
return nil, errors.New("failure")
}

succeedCollector.EXPECT().close().Return(nil)

collector, err := newCollectorWithSubsystems(getBasicNvmlMock(), map[string]subsystemFactory{"ok": factory, "fail": factory})
require.Nil(t, collector)
require.Error(t, err)
}

func TestCollectorsCollectMetricsEvenInCaseOfFailure(t *testing.T) {
dummy := &mockSubsystemCollector{}
factory := func(lib nvml.Interface, devices []nvml.Device) (subsystemCollector, error) {
return dummy, nil
}

collector, err := newCollectorWithSubsystems(getBasicNvmlMock(), map[string]subsystemFactory{"one": factory, "two": factory})
require.NotNil(t, collector)
require.NoError(t, err)

// change the collectors so that they're executed in the order we want
succeedCollector := &mockSubsystemCollector{}
failCollector := &mockSubsystemCollector{}
collector.collectors = []subsystemCollector{succeedCollector, failCollector}

succeedCollector.EXPECT().collectMetrics(mock.Anything).Return([]Metric{{Name: "succeed"}}, nil)
succeedCollector.EXPECT().name().Return("succeed").Maybe()
failCollector.EXPECT().collectMetrics(mock.Anything).Return(nil, errors.New("failure"))
failCollector.EXPECT().name().Return("fail").Maybe()

metrics, err := collector.Collect()
require.Error(t, err)
require.Len(t, metrics, 1)
require.Equal(t, "succeed", metrics[0].Name)
require.NotEmpty(t, metrics[0].Tags)
}
185 changes: 185 additions & 0 deletions pkg/collector/corechecks/gpu/nvmlmetrics/mocks.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5cfca92

Please sign in to comment.