From 838910d29b164f7192c960b81957a531f0996956 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 26 Sep 2024 14:37:05 +0200 Subject: [PATCH] Add creation of select driver symlinks to CDI spec This change aligns the creation of symlinks under CDI with the implementation in libnvidia-container. If the driver libraries are present, the following symlinks are created: * {{ .LibRoot }}/libcuda.so -> libcuda.so.1 * {{ .LibRoot }}/libnvidia-opticalflow.so -> libnvidia-opticalflow.so.1 * {{ .LibRoot }}/libGLX_indirect.so.0 -> libGLX_nvidia.so.{{ .Version }} Signed-off-by: Evan Lezar --- internal/discover/none.go | 6 +- internal/discover/symlinks.go | 108 +++++++ internal/discover/symlinks_test.go | 330 ++++++++++++++++++++ internal/platform-support/dgpu/nvml_test.go | 2 +- internal/platform-support/tegra/csv.go | 20 +- internal/platform-support/tegra/symlinks.go | 58 +--- pkg/nvcdi/driver-nvml.go | 10 +- 7 files changed, 464 insertions(+), 70 deletions(-) create mode 100644 internal/discover/symlinks.go create mode 100644 internal/discover/symlinks_test.go diff --git a/internal/discover/none.go b/internal/discover/none.go index 2a1d2c57..554e7eab 100644 --- a/internal/discover/none.go +++ b/internal/discover/none.go @@ -24,15 +24,15 @@ var _ Discover = (*None)(nil) // Devices returns an empty list of devices func (e None) Devices() ([]Device, error) { - return []Device{}, nil + return nil, nil } // Mounts returns an empty list of mounts func (e None) Mounts() ([]Mount, error) { - return []Mount{}, nil + return nil, nil } // Hooks returns an empty list of hooks func (e None) Hooks() ([]Hook, error) { - return []Hook{}, nil + return nil, nil } diff --git a/internal/discover/symlinks.go b/internal/discover/symlinks.go new file mode 100644 index 00000000..0119de0d --- /dev/null +++ b/internal/discover/symlinks.go @@ -0,0 +1,108 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package discover + +import ( + "fmt" + "path/filepath" +) + +type additionalSymlinks struct { + Discover + version string + nvidiaCDIHookPath string +} + +// WithDriverDotSoSymlinks decorates the provided discoverer. +// A hook is added that checks for specific driver symlinks that need to be created. +func WithDriverDotSoSymlinks(mounts Discover, version string, nvidiaCDIHookPath string) Discover { + if version == "" { + version = "*.*" + } + return &additionalSymlinks{ + Discover: mounts, + nvidiaCDIHookPath: nvidiaCDIHookPath, + version: version, + } +} + +// Hooks returns a hook to create the additional symlinks based on the mounts. +func (d *additionalSymlinks) Hooks() ([]Hook, error) { + mounts, err := d.Discover.Mounts() + if err != nil { + return nil, fmt.Errorf("failed to get library mounts: %v", err) + } + hooks, err := d.Discover.Hooks() + if err != nil { + return nil, fmt.Errorf("failed to get hooks: %v", err) + } + + var links []string + processedPaths := make(map[string]bool) + processedLinks := make(map[string]bool) + for _, mount := range mounts { + if processedPaths[mount.Path] { + continue + } + processedPaths[mount.Path] = true + + for _, link := range d.getLinksForMount(mount.Path) { + if processedLinks[link] { + continue + } + processedLinks[link] = true + links = append(links, link) + } + } + + if len(links) == 0 { + return hooks, nil + } + + hook := CreateCreateSymlinkHook(d.nvidiaCDIHookPath, links).(Hook) + return append(hooks, hook), nil +} + +// getLinksForMount maps the path to created links if any. +func (d additionalSymlinks) getLinksForMount(path string) []string { + dir, filename := filepath.Split(path) + switch { + case d.isDriverLibrary("libcuda.so", filename): + // XXX Many applications wrongly assume that libcuda.so exists (e.g. with dlopen). + // create libcuda.so -> libcuda.so.1 symlink + link := fmt.Sprintf("%s::%s", "libcuda.so.1", filepath.Join(dir, "libcuda.so")) + return []string{link} + case d.isDriverLibrary("libGLX_nvidia.so", filename): + // XXX GLVND requires this symlink for indirect GLX support. + // create libGLX_indirect.so.0 -> libGLX_nvidia.so.VERSION symlink + link := fmt.Sprintf("%s::%s", filename, filepath.Join(dir, "libGLX_indirect.so.0")) + return []string{link} + case d.isDriverLibrary("libnvidia-opticalflow.so", filename): + // XXX Fix missing symlink for libnvidia-opticalflow.so. + // create libnvidia-opticalflow.so -> libnvidia-opticalflow.so.1 symlink + link := fmt.Sprintf("%s::%s", "libnvidia-opticalflow.so.1", filepath.Join(dir, "libnvidia-opticalflow.so")) + return []string{link} + } + return nil +} + +// isDriverLibrary checks whether the specified filename is a specific driver library. +func (d additionalSymlinks) isDriverLibrary(libraryName string, filename string) bool { + pattern := libraryName + "." + d.version + match, _ := filepath.Match(pattern, filename) + return match +} diff --git a/internal/discover/symlinks_test.go b/internal/discover/symlinks_test.go new file mode 100644 index 00000000..7653b847 --- /dev/null +++ b/internal/discover/symlinks_test.go @@ -0,0 +1,330 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package discover + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestWithWithDriverDotSoSymlinks(t *testing.T) { + testCases := []struct { + description string + discover Discover + version string + expectedDevices []Device + expectedDevicesError error + expectedHooks []Hook + expectedHooksError error + expectedMounts []Mount + expectedMountsError error + }{ + { + description: "empty discoverer remains empty", + discover: None{}, + }, + { + description: "non-matching discoverer remains unchanged", + discover: &DiscoverMock{ + DevicesFunc: func() ([]Device, error) { + devices := []Device{ + { + Path: "/dev/dev1", + }, + } + return devices, nil + }, + HooksFunc: func() ([]Hook, error) { + hooks := []Hook{ + { + Lifecycle: "prestart", + Path: "/path/to/a/hook", + Args: []string{"hook", "arg1", "arg2"}, + }, + } + return hooks, nil + }, + MountsFunc: func() ([]Mount, error) { + mounts := []Mount{ + { + Path: "/usr/lib/libnotcuda.so.1.2.3", + }, + } + return mounts, nil + }, + }, + expectedDevices: []Device{ + { + Path: "/dev/dev1", + }, + }, + expectedHooks: []Hook{ + { + Lifecycle: "prestart", + Path: "/path/to/a/hook", + Args: []string{"hook", "arg1", "arg2"}, + }, + }, + expectedMounts: []Mount{ + { + Path: "/usr/lib/libnotcuda.so.1.2.3", + }, + }, + }, + { + description: "libcuda.so.RM_VERSION is matched", + discover: &DiscoverMock{ + DevicesFunc: func() ([]Device, error) { + return nil, nil + }, + HooksFunc: func() ([]Hook, error) { + return nil, nil + }, + MountsFunc: func() ([]Mount, error) { + mounts := []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2.3", + }, + } + return mounts, nil + }, + }, + version: "1.2.3", + expectedMounts: []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2.3", + }, + }, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: "/path/to/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, + }, + }, + }, + { + description: "libcuda.so.RM_VERSION is matched by pattern", + discover: &DiscoverMock{ + DevicesFunc: func() ([]Device, error) { + return nil, nil + }, + HooksFunc: func() ([]Hook, error) { + return nil, nil + }, + MountsFunc: func() ([]Mount, error) { + mounts := []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2.3", + }, + } + return mounts, nil + }, + }, + version: "", + expectedMounts: []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2.3", + }, + }, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: "/path/to/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, + }, + }, + }, + { + description: "beta libcuda.so.RM_VERSION is matched", + discover: &DiscoverMock{ + DevicesFunc: func() ([]Device, error) { + return nil, nil + }, + HooksFunc: func() ([]Hook, error) { + return nil, nil + }, + MountsFunc: func() ([]Mount, error) { + mounts := []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2", + }, + } + return mounts, nil + }, + }, + expectedMounts: []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2", + }, + }, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: "/path/to/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, + }, + }, + }, + { + description: "non-matching libcuda.so.RM_VERSION is ignored", + discover: &DiscoverMock{ + DevicesFunc: func() ([]Device, error) { + return nil, nil + }, + HooksFunc: func() ([]Hook, error) { + return nil, nil + }, + MountsFunc: func() ([]Mount, error) { + mounts := []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2.3", + }, + } + return mounts, nil + }, + }, + version: "4.5.6", + expectedMounts: []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2.3", + }, + }, + }, + { + description: "hooks are extended", + discover: &DiscoverMock{ + DevicesFunc: func() ([]Device, error) { + return nil, nil + }, + HooksFunc: func() ([]Hook, error) { + hooks := []Hook{ + { + Lifecycle: "prestart", + Path: "/path/to/a/hook", + Args: []string{"hook", "arg1", "arg2"}, + }, + } + return hooks, nil + }, + MountsFunc: func() ([]Mount, error) { + mounts := []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2.3", + }, + } + return mounts, nil + }, + }, + version: "1.2.3", + expectedMounts: []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2.3", + }, + }, + expectedHooks: []Hook{ + { + Lifecycle: "prestart", + Path: "/path/to/a/hook", + Args: []string{"hook", "arg1", "arg2"}, + }, + { + Lifecycle: "createContainer", + Path: "/path/to/nvidia-cdi-hook", + Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, + }, + }, + }, + { + description: "all driver so symlinks are matched", + discover: &DiscoverMock{ + DevicesFunc: func() ([]Device, error) { + return nil, nil + }, + HooksFunc: func() ([]Hook, error) { + return nil, nil + }, + MountsFunc: func() ([]Mount, error) { + mounts := []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2.3", + }, + { + Path: "/usr/lib/libGLX_nvidia.so.1.2.3", + }, + { + Path: "/usr/lib/libnvidia-opticalflow.so.1.2.3", + }, + { + Path: "/usr/lib/libanother.so.1.2.3", + }, + } + return mounts, nil + }, + }, + expectedMounts: []Mount{ + { + Path: "/usr/lib/libcuda.so.1.2.3", + }, + { + Path: "/usr/lib/libGLX_nvidia.so.1.2.3", + }, + { + Path: "/usr/lib/libnvidia-opticalflow.so.1.2.3", + }, + { + Path: "/usr/lib/libanother.so.1.2.3", + }, + }, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: "/path/to/nvidia-cdi-hook", + Args: []string{ + "nvidia-cdi-hook", "create-symlinks", + "--link", "libcuda.so.1::/usr/lib/libcuda.so", + "--link", "libGLX_nvidia.so.1.2.3::/usr/lib/libGLX_indirect.so.0", + "--link", "libnvidia-opticalflow.so.1::/usr/lib/libnvidia-opticalflow.so", + }, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + d := WithDriverDotSoSymlinks( + tc.discover, + tc.version, + "/path/to/nvidia-cdi-hook", + ) + + devices, err := d.Devices() + require.ErrorIs(t, err, tc.expectedDevicesError) + require.EqualValues(t, tc.expectedDevices, devices) + + hooks, err := d.Hooks() + require.ErrorIs(t, err, tc.expectedHooksError) + require.EqualValues(t, tc.expectedHooks, hooks) + + mounts, err := d.Mounts() + require.ErrorIs(t, err, tc.expectedMountsError) + require.EqualValues(t, tc.expectedMounts, mounts) + }) + } +} diff --git a/internal/platform-support/dgpu/nvml_test.go b/internal/platform-support/dgpu/nvml_test.go index da4ac2a7..2ffbb9f3 100644 --- a/internal/platform-support/dgpu/nvml_test.go +++ b/internal/platform-support/dgpu/nvml_test.go @@ -139,7 +139,7 @@ func TestNewNvmlMIGDiscoverer(t *testing.T) { }, expectedDevices: nil, expectedMounts: nil, - expectedHooks: []discover.Hook{}, + expectedHooks: nil, }, } for _, tc := range testCases { diff --git a/internal/platform-support/tegra/csv.go b/internal/platform-support/tegra/csv.go index e47ae9de..a025c52d 100644 --- a/internal/platform-support/tegra/csv.go +++ b/internal/platform-support/tegra/csv.go @@ -49,14 +49,20 @@ func (o tegraOptions) newDiscovererFromCSVFiles() (discover.Discover, error) { targetsByType[csv.MountSpecDir], ) - // Libraries and symlinks use the same locator. - libraries := discover.NewMounts( - o.logger, - o.symlinkLocator, - o.driverRoot, - targetsByType[csv.MountSpecLib], + // We create a discoverer for mounted libraries and add additional .so + // symlinks for the driver. + libraries := discover.WithDriverDotSoSymlinks( + discover.NewMounts( + o.logger, + o.symlinkLocator, + o.driverRoot, + targetsByType[csv.MountSpecLib], + ), + "", + o.nvidiaCDIHookPath, ) + // We process the expliclitlty requested symlinks. symlinkTargets := o.ignorePatterns.Apply(targetsByType[csv.MountSpecSym]...) o.logger.Debugf("Filtered symlink targets: %v", symlinkTargets) symlinks := discover.NewMounts( @@ -65,7 +71,7 @@ func (o tegraOptions) newDiscovererFromCSVFiles() (discover.Discover, error) { o.driverRoot, symlinkTargets, ) - createSymlinks := o.createCSVSymlinkHooks(symlinkTargets, libraries) + createSymlinks := o.createCSVSymlinkHooks(symlinkTargets) d := discover.Merge( devices, diff --git a/internal/platform-support/tegra/symlinks.go b/internal/platform-support/tegra/symlinks.go index 37b07e6d..cc677638 100644 --- a/internal/platform-support/tegra/symlinks.go +++ b/internal/platform-support/tegra/symlinks.go @@ -18,8 +18,6 @@ package tegra import ( "fmt" - "path/filepath" - "strings" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" @@ -31,7 +29,6 @@ type symlinkHook struct { logger logger.Interface nvidiaCDIHookPath string targets []string - mountsFrom discover.Discover // The following can be overridden for testing symlinkChainLocator lookup.Locator @@ -39,12 +36,11 @@ type symlinkHook struct { } // createCSVSymlinkHooks creates a discoverer for a hook that creates required symlinks in the container -func (o tegraOptions) createCSVSymlinkHooks(targets []string, mounts discover.Discover) discover.Discover { +func (o tegraOptions) createCSVSymlinkHooks(targets []string) discover.Discover { return symlinkHook{ logger: o.logger, nvidiaCDIHookPath: o.nvidiaCDIHookPath, targets: targets, - mountsFrom: mounts, symlinkChainLocator: o.symlinkChainLocator, resolveSymlink: o.resolveSymlink, } @@ -52,62 +48,12 @@ func (o tegraOptions) createCSVSymlinkHooks(targets []string, mounts discover.Di // Hooks returns a hook to create the symlinks from the required CSV files func (d symlinkHook) Hooks() ([]discover.Hook, error) { - specificLinks, err := d.getSpecificLinks() - if err != nil { - return nil, fmt.Errorf("failed to determine specific links: %v", err) - } - - csvSymlinks := d.getCSVFileSymlinks() - return discover.CreateCreateSymlinkHook( d.nvidiaCDIHookPath, - append(csvSymlinks, specificLinks...), + d.getCSVFileSymlinks(), ).Hooks() } -// getSpecificLinks returns the required specic links that need to be created -func (d symlinkHook) getSpecificLinks() ([]string, error) { - mounts, err := d.mountsFrom.Mounts() - if err != nil { - return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err) - } - - linkProcessed := make(map[string]bool) - var links []string - for _, m := range mounts { - var target string - var link string - - lib := filepath.Base(m.Path) - - switch { - case strings.HasPrefix(lib, "libcuda.so"): - // XXX Many applications wrongly assume that libcuda.so exists (e.g. with dlopen). - target = "libcuda.so.1" - link = "libcuda.so" - case strings.HasPrefix(lib, "libGLX_nvidia.so"): - // XXX GLVND requires this symlink for indirect GLX support. - target = lib - link = "libGLX_indirect.so.0" - case strings.HasPrefix(lib, "libnvidia-opticalflow.so"): - // XXX Fix missing symlink for libnvidia-opticalflow.so. - target = "libnvidia-opticalflow.so.1" - link = "libnvidia-opticalflow.so" - default: - continue - } - if linkProcessed[link] { - continue - } - linkProcessed[link] = true - - linkPath := filepath.Join(filepath.Dir(m.Path), link) - links = append(links, fmt.Sprintf("%v::%v", target, linkPath)) - } - - return links, nil -} - // getSymlinkCandidates returns a list of symlinks that are candidates for being created. func (d symlinkHook) getSymlinkCandidates() []string { var candidates []string diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index 8fb39888..90b0decc 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -97,11 +97,15 @@ func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nv libraryPaths, ) - hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath) + updateLDCache, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath) d := discover.Merge( - libraries, - hooks, + discover.WithDriverDotSoSymlinks( + libraries, + version, + nvidiaCDIHookPath, + ), + updateLDCache, ) return d, nil