From 7af907a0c9b7d7a2989a897f7a25e770720ee31c Mon Sep 17 00:00:00 2001 From: Karthik Vetrivel Date: Fri, 6 Mar 2026 11:47:39 -0500 Subject: [PATCH] Add gpuop-cfg list-images command for csv and clusterpolicy Signed-off-by: Karthik Vetrivel --- .../internal/images/clusterpolicy.go | 60 ++++ .../internal/images/clusterpolicy_test.go | 151 ++++++++++ cmd/gpuop-cfg/internal/images/csv.go | 44 +++ cmd/gpuop-cfg/internal/images/csv_test.go | 104 +++++++ cmd/gpuop-cfg/list-images/list-images.go | 134 +++++++++ cmd/gpuop-cfg/list-images/list-images_test.go | 261 ++++++++++++++++++ cmd/gpuop-cfg/main.go | 2 + .../validate/clusterpolicy/images.go | 133 +-------- cmd/gpuop-cfg/validate/csv/images.go | 32 +-- 9 files changed, 775 insertions(+), 146 deletions(-) create mode 100644 cmd/gpuop-cfg/internal/images/clusterpolicy.go create mode 100644 cmd/gpuop-cfg/internal/images/clusterpolicy_test.go create mode 100644 cmd/gpuop-cfg/internal/images/csv.go create mode 100644 cmd/gpuop-cfg/internal/images/csv_test.go create mode 100644 cmd/gpuop-cfg/list-images/list-images.go create mode 100644 cmd/gpuop-cfg/list-images/list-images_test.go diff --git a/cmd/gpuop-cfg/internal/images/clusterpolicy.go b/cmd/gpuop-cfg/internal/images/clusterpolicy.go new file mode 100644 index 000000000..b68325655 --- /dev/null +++ b/cmd/gpuop-cfg/internal/images/clusterpolicy.go @@ -0,0 +1,60 @@ +/** +# Copyright (c), NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package images + +import ( + "fmt" + + v1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" +) + +type OperandImage struct { + Name string + Image string +} + +func FromClusterPolicy(spec *v1.ClusterPolicySpec) ([]OperandImage, error) { + type operand struct { + name string + spec interface{} + } + + operands := []operand{ + {"Driver", &spec.Driver}, + {"Toolkit", &spec.Toolkit}, + {"DevicePlugin", &spec.DevicePlugin}, + {"DCGMExporter", &spec.DCGMExporter}, + {"DCGM", &spec.DCGM}, + {"GPUFeatureDiscovery", &spec.GPUFeatureDiscovery}, + {"MIGManager", &spec.MIGManager}, + {"GPUDirectStorage", spec.GPUDirectStorage}, + {"VFIOManager", &spec.VFIOManager}, + {"SandboxDevicePlugin", &spec.SandboxDevicePlugin}, + {"VGPUDeviceManager", &spec.VGPUDeviceManager}, + } + + var images []OperandImage + for _, op := range operands { + path, err := v1.ImagePath(op.spec) + if err != nil { + return nil, fmt.Errorf("failed to construct image path for %s: %v", op.name, err) + } + images = append(images, OperandImage{Name: op.name, Image: path}) + } + + return images, nil +} diff --git a/cmd/gpuop-cfg/internal/images/clusterpolicy_test.go b/cmd/gpuop-cfg/internal/images/clusterpolicy_test.go new file mode 100644 index 000000000..d7746d1ba --- /dev/null +++ b/cmd/gpuop-cfg/internal/images/clusterpolicy_test.go @@ -0,0 +1,151 @@ +/** +# Copyright (c), NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package images + +import ( + "testing" + + v1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" +) + +func newClusterPolicySpec() *v1.ClusterPolicySpec { + return &v1.ClusterPolicySpec{ + Driver: v1.DriverSpec{ + Repository: "nvcr.io/nvidia", + Image: "driver", + Version: "550.127.05", + }, + Toolkit: v1.ToolkitSpec{ + Repository: "nvcr.io/nvidia/k8s", + Image: "container-toolkit", + Version: "v1.16.1", + }, + DevicePlugin: v1.DevicePluginSpec{ + Repository: "nvcr.io/nvidia", + Image: "k8s-device-plugin", + Version: "v0.16.1", + }, + DCGMExporter: v1.DCGMExporterSpec{ + Repository: "nvcr.io/nvidia/k8s", + Image: "dcgm-exporter", + Version: "3.3.6", + }, + DCGM: v1.DCGMSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "dcgm", + Version: "3.3.6", + }, + GPUFeatureDiscovery: v1.GPUFeatureDiscoverySpec{ + Repository: "nvcr.io/nvidia", + Image: "gpu-feature-discovery", + Version: "v0.16.1", + }, + MIGManager: v1.MIGManagerSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "k8s-mig-manager", + Version: "v0.8.0", + }, + GPUDirectStorage: &v1.GPUDirectStorageSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "nvidia-fs", + Version: "2.20.5", + }, + VFIOManager: v1.VFIOManagerSpec{ + Repository: "nvcr.io/nvidia", + Image: "vfio-manager", + Version: "v0.4.0", + }, + SandboxDevicePlugin: v1.SandboxDevicePluginSpec{ + Repository: "nvcr.io/nvidia", + Image: "kubevirt-gpu-device-plugin", + Version: "v1.2.7", + }, + VGPUDeviceManager: v1.VGPUDeviceManagerSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "vgpu-device-manager", + Version: "v0.2.7", + }, + } +} + +func Test_FromClusterPolicy(t *testing.T) { + tests := []struct { + name string + spec *v1.ClusterPolicySpec + wantImages []OperandImage + }{ + { + name: "constructs image paths from repository, image, and version", + spec: newClusterPolicySpec(), + wantImages: []OperandImage{ + {Name: "Driver", Image: "nvcr.io/nvidia/driver:550.127.05"}, + {Name: "Toolkit", Image: "nvcr.io/nvidia/k8s/container-toolkit:v1.16.1"}, + {Name: "DevicePlugin", Image: "nvcr.io/nvidia/k8s-device-plugin:v0.16.1"}, + {Name: "DCGMExporter", Image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.6"}, + {Name: "DCGM", Image: "nvcr.io/nvidia/cloud-native/dcgm:3.3.6"}, + {Name: "GPUFeatureDiscovery", Image: "nvcr.io/nvidia/gpu-feature-discovery:v0.16.1"}, + {Name: "MIGManager", Image: "nvcr.io/nvidia/cloud-native/k8s-mig-manager:v0.8.0"}, + {Name: "GPUDirectStorage", Image: "nvcr.io/nvidia/cloud-native/nvidia-fs:2.20.5"}, + {Name: "VFIOManager", Image: "nvcr.io/nvidia/vfio-manager:v0.4.0"}, + {Name: "SandboxDevicePlugin", Image: "nvcr.io/nvidia/kubevirt-gpu-device-plugin:v1.2.7"}, + {Name: "VGPUDeviceManager", Image: "nvcr.io/nvidia/cloud-native/vgpu-device-manager:v0.2.7"}, + }, + }, + { + name: "uses image as full path when repository and version are empty", + spec: func() *v1.ClusterPolicySpec { + s := newClusterPolicySpec() + s.Driver = v1.DriverSpec{ + Image: "nvcr.io/nvidia/driver:550.127.05", + } + return s + }(), + wantImages: []OperandImage{ + {Name: "Driver", Image: "nvcr.io/nvidia/driver:550.127.05"}, + {Name: "Toolkit", Image: "nvcr.io/nvidia/k8s/container-toolkit:v1.16.1"}, + {Name: "DevicePlugin", Image: "nvcr.io/nvidia/k8s-device-plugin:v0.16.1"}, + {Name: "DCGMExporter", Image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.6"}, + {Name: "DCGM", Image: "nvcr.io/nvidia/cloud-native/dcgm:3.3.6"}, + {Name: "GPUFeatureDiscovery", Image: "nvcr.io/nvidia/gpu-feature-discovery:v0.16.1"}, + {Name: "MIGManager", Image: "nvcr.io/nvidia/cloud-native/k8s-mig-manager:v0.8.0"}, + {Name: "GPUDirectStorage", Image: "nvcr.io/nvidia/cloud-native/nvidia-fs:2.20.5"}, + {Name: "VFIOManager", Image: "nvcr.io/nvidia/vfio-manager:v0.4.0"}, + {Name: "SandboxDevicePlugin", Image: "nvcr.io/nvidia/kubevirt-gpu-device-plugin:v1.2.7"}, + {Name: "VGPUDeviceManager", Image: "nvcr.io/nvidia/cloud-native/vgpu-device-manager:v0.2.7"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := FromClusterPolicy(tt.spec) + if err != nil { + t.Fatalf("FromClusterPolicy() unexpected error: %v", err) + } + if len(got) != len(tt.wantImages) { + t.Fatalf("FromClusterPolicy() returned %d images, want %d", len(got), len(tt.wantImages)) + } + for i, op := range got { + if op.Name != tt.wantImages[i].Name { + t.Errorf("FromClusterPolicy()[%d].Name = %q, want %q", i, op.Name, tt.wantImages[i].Name) + } + if op.Image != tt.wantImages[i].Image { + t.Errorf("FromClusterPolicy()[%d].Image = %q, want %q", i, op.Image, tt.wantImages[i].Image) + } + } + }) + } +} diff --git a/cmd/gpuop-cfg/internal/images/csv.go b/cmd/gpuop-cfg/internal/images/csv.go new file mode 100644 index 000000000..76deec93c --- /dev/null +++ b/cmd/gpuop-cfg/internal/images/csv.go @@ -0,0 +1,44 @@ +/** +# Copyright (c), NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package images + +import ( + "strings" + + "github.com/operator-framework/api/pkg/operators/v1alpha1" +) + +func FromCSV(csv *v1alpha1.ClusterServiceVersion) []string { + var images []string + + for _, image := range csv.Spec.RelatedImages { + images = append(images, image.Image) + } + + deployment := csv.Spec.InstallStrategy.StrategySpec.DeploymentSpecs[0] + ctr := deployment.Spec.Template.Spec.Containers[0] + images = append(images, ctr.Image) + + for _, env := range ctr.Env { + if !strings.HasSuffix(env.Name, "_IMAGE") { + continue + } + images = append(images, env.Value) + } + + return images +} diff --git a/cmd/gpuop-cfg/internal/images/csv_test.go b/cmd/gpuop-cfg/internal/images/csv_test.go new file mode 100644 index 000000000..21e151d9f --- /dev/null +++ b/cmd/gpuop-cfg/internal/images/csv_test.go @@ -0,0 +1,104 @@ +/** +# Copyright (c), NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package images + +import ( + "testing" + + "github.com/operator-framework/api/pkg/operators/v1alpha1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" +) + +func newCSV(relatedImages []v1alpha1.RelatedImage, containerImage string, envVars []corev1.EnvVar) *v1alpha1.ClusterServiceVersion { + return &v1alpha1.ClusterServiceVersion{ + Spec: v1alpha1.ClusterServiceVersionSpec{ + RelatedImages: relatedImages, + InstallStrategy: v1alpha1.NamedInstallStrategy{ + StrategySpec: v1alpha1.StrategyDetailsDeployment{ + DeploymentSpecs: []v1alpha1.StrategyDeploymentSpec{ + { + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Image: containerImage, + Env: envVars, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +func Test_FromCSV(t *testing.T) { + tests := []struct { + name string + csv *v1alpha1.ClusterServiceVersion + want []string + }{ + { + name: "collects related images, container image, and IMAGE env vars", + csv: newCSV( + []v1alpha1.RelatedImage{ + {Image: "nvcr.io/nvidia/gpu-operator:v24.9.0"}, + {Image: "nvcr.io/nvidia/driver:550.127.05"}, + }, + "nvcr.io/nvidia/gpu-operator:v24.9.0", + []corev1.EnvVar{ + {Name: "DRIVER_IMAGE", Value: "nvcr.io/nvidia/driver:550"}, + {Name: "TOOLKIT_IMAGE", Value: "nvcr.io/nvidia/toolkit:1.16"}, + {Name: "LOG_LEVEL", Value: "debug"}, + }, + ), + want: []string{ + "nvcr.io/nvidia/gpu-operator:v24.9.0", + "nvcr.io/nvidia/driver:550.127.05", + "nvcr.io/nvidia/gpu-operator:v24.9.0", + "nvcr.io/nvidia/driver:550", + "nvcr.io/nvidia/toolkit:1.16", + }, + }, + { + name: "no related images and no env vars", + csv: newCSV(nil, "nvcr.io/nvidia/gpu-operator:v24.9.0", nil), + want: []string{ + "nvcr.io/nvidia/gpu-operator:v24.9.0", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := FromCSV(tt.csv) + if len(got) != len(tt.want) { + t.Fatalf("FromCSV() returned %d images, want %d", len(got), len(tt.want)) + } + for i, image := range got { + if image != tt.want[i] { + t.Errorf("FromCSV()[%d] = %q, want %q", i, image, tt.want[i]) + } + } + }) + } +} diff --git a/cmd/gpuop-cfg/list-images/list-images.go b/cmd/gpuop-cfg/list-images/list-images.go new file mode 100644 index 000000000..bccb444e3 --- /dev/null +++ b/cmd/gpuop-cfg/list-images/list-images.go @@ -0,0 +1,134 @@ +/** +# Copyright (c), NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package listimages + +import ( + "context" + "fmt" + "io" + "os" + + "github.com/operator-framework/api/pkg/operators/v1alpha1" + "github.com/sirupsen/logrus" + cli "github.com/urfave/cli/v3" + "sigs.k8s.io/yaml" + + v1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" + "github.com/NVIDIA/gpu-operator/cmd/gpuop-cfg/internal/images" +) + +type options struct { + input string +} + +func NewCommand(_ *logrus.Logger) *cli.Command { + listImages := cli.Command{ + Name: "list-images", + Usage: "List container images referenced in GPU Operator configuration files", + } + + listImages.Commands = []*cli.Command{ + buildCSV(), + buildClusterPolicy(), + } + + return &listImages +} + +func buildCSV() *cli.Command { + opts := options{} + + c := cli.Command{ + Name: "csv", + Usage: "List images from a ClusterServiceVersion manifest", + Action: func(ctx context.Context, cmd *cli.Command) error { + contents, err := getContents(opts.input) + if err != nil { + return fmt.Errorf("failed to read file: %v", err) + } + + spec := &v1alpha1.ClusterServiceVersion{} + if err := yaml.Unmarshal(contents, spec); err != nil { + return fmt.Errorf("failed to unmarshal csv: %v", err) + } + + for _, image := range images.FromCSV(spec) { + fmt.Println(image) + } + return nil + }, + } + + c.Flags = []cli.Flag{ + &cli.StringFlag{ + Name: "input", + Usage: "Specify the input file. If this is '-' the file is read from STDIN", + Value: "-", + Destination: &opts.input, + }, + } + + return &c +} + +func buildClusterPolicy() *cli.Command { + opts := options{} + + c := cli.Command{ + Name: "clusterpolicy", + Usage: "List images from a ClusterPolicy manifest", + Action: func(ctx context.Context, cmd *cli.Command) error { + contents, err := getContents(opts.input) + if err != nil { + return fmt.Errorf("failed to read file: %v", err) + } + + spec := &v1.ClusterPolicy{} + if err := yaml.Unmarshal(contents, spec); err != nil { + return fmt.Errorf("failed to unmarshal clusterpolicy: %v", err) + } + + operandImages, err := images.FromClusterPolicy(&spec.Spec) + if err != nil { + return err + } + + for _, op := range operandImages { + fmt.Println(op.Image) + } + return nil + }, + } + + c.Flags = []cli.Flag{ + &cli.StringFlag{ + Name: "input", + Usage: "Specify the input file. If this is '-' the file is read from STDIN", + Value: "-", + Destination: &opts.input, + }, + } + + return &c +} + +func getContents(input string) ([]byte, error) { + if input == "-" { + return io.ReadAll(os.Stdin) + } + return os.ReadFile(input) +} diff --git a/cmd/gpuop-cfg/list-images/list-images_test.go b/cmd/gpuop-cfg/list-images/list-images_test.go new file mode 100644 index 000000000..c9de8603d --- /dev/null +++ b/cmd/gpuop-cfg/list-images/list-images_test.go @@ -0,0 +1,261 @@ +/** +# Copyright (c), NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package listimages + +import ( + "context" + "os" + "testing" + + "github.com/sirupsen/logrus" +) + +func Test_getContents(t *testing.T) { + tests := []struct { + name string + fileData string + createFile bool + wantErr bool + }{ + { + name: "reads from file", + fileData: "apiVersion: v1\nkind: ClusterPolicy\n", + createFile: true, + wantErr: false, + }, + { + name: "file does not exist", + createFile: false, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + testFile := tmpDir + "/input.yaml" + + if tt.createFile { + err := os.WriteFile(testFile, []byte(tt.fileData), 0600) + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + } + + got, err := getContents(testFile) + if tt.wantErr { + if err == nil { + t.Errorf("getContents() expected error but got none") + } + return + } + if err != nil { + t.Fatalf("getContents() unexpected error: %v", err) + } + if string(got) != tt.fileData { + t.Errorf("getContents() = %q, want %q", string(got), tt.fileData) + } + }) + } +} + +func Test_buildCSV(t *testing.T) { + tests := []struct { + name string + fileData string + createFile bool + wantErr bool + }{ + { + name: "valid CSV with related images and container env vars", + fileData: `apiVersion: operators.coreos.com/v1alpha1 +kind: ClusterServiceVersion +spec: + relatedImages: + - image: nvcr.io/nvidia/gpu-operator:v24.9.0 + install: + strategy: deployment + spec: + deployments: + - spec: + template: + spec: + containers: + - image: nvcr.io/nvidia/gpu-operator:v24.9.0 + env: + - name: DRIVER_IMAGE + value: nvcr.io/nvidia/driver:550 +`, + createFile: true, + wantErr: false, + }, + { + name: "invalid YAML", + fileData: `{{{ not yaml`, + createFile: true, + wantErr: true, + }, + { + name: "file does not exist", + createFile: false, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + testFile := tmpDir + "/csv.yaml" + + if tt.createFile { + err := os.WriteFile(testFile, []byte(tt.fileData), 0600) + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + } + + cmd := buildCSV() + err := cmd.Run(context.Background(), []string{"csv", "--input", testFile}) + + if tt.wantErr { + if err == nil { + t.Errorf("buildCSV() expected error but got none") + } + } else { + if err != nil { + t.Errorf("buildCSV() unexpected error: %v", err) + } + } + }) + } +} + +func Test_buildClusterPolicy(t *testing.T) { + tests := []struct { + name string + fileData string + createFile bool + wantErr bool + }{ + { + name: "valid ClusterPolicy", + fileData: `apiVersion: nvidia.com/v1 +kind: ClusterPolicy +spec: + driver: + repository: nvcr.io/nvidia + image: driver + version: "550.127.05" + toolkit: + repository: nvcr.io/nvidia/k8s + image: container-toolkit + version: v1.16.1 + devicePlugin: + repository: nvcr.io/nvidia + image: k8s-device-plugin + version: v0.16.1 + dcgmExporter: + repository: nvcr.io/nvidia/k8s + image: dcgm-exporter + version: "3.3.6" + dcgm: + repository: nvcr.io/nvidia/cloud-native + image: dcgm + version: "3.3.6" + gfd: + repository: nvcr.io/nvidia + image: gpu-feature-discovery + version: v0.16.1 + migManager: + repository: nvcr.io/nvidia/cloud-native + image: k8s-mig-manager + version: v0.8.0 + gds: + repository: nvcr.io/nvidia/cloud-native + image: nvidia-fs + version: "2.20.5" + vfioManager: + repository: nvcr.io/nvidia + image: vfio-manager + version: v0.4.0 + sandboxDevicePlugin: + repository: nvcr.io/nvidia + image: kubevirt-gpu-device-plugin + version: v1.2.7 + vgpuDeviceManager: + repository: nvcr.io/nvidia/cloud-native + image: vgpu-device-manager + version: v0.2.7 +`, + createFile: true, + wantErr: false, + }, + { + name: "invalid YAML", + fileData: `{{{ not yaml`, + createFile: true, + wantErr: true, + }, + { + name: "file does not exist", + createFile: false, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + testFile := tmpDir + "/clusterpolicy.yaml" + + if tt.createFile { + err := os.WriteFile(testFile, []byte(tt.fileData), 0600) + if err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + } + + cmd := buildClusterPolicy() + err := cmd.Run(context.Background(), []string{"clusterpolicy", "--input", testFile}) + + if tt.wantErr { + if err == nil { + t.Errorf("buildClusterPolicy() expected error but got none") + } + } else { + if err != nil { + t.Errorf("buildClusterPolicy() unexpected error: %v", err) + } + } + }) + } +} + +func Test_NewCommand(t *testing.T) { + logger := logrus.New() + cmd := NewCommand(logger) + + if cmd.Name != "list-images" { + t.Errorf("NewCommand().Name = %q, want %q", cmd.Name, "list-images") + } + if len(cmd.Commands) != 2 { + t.Fatalf("NewCommand() has %d subcommands, want 2", len(cmd.Commands)) + } + if cmd.Commands[0].Name != "csv" { + t.Errorf("NewCommand().Commands[0].Name = %q, want %q", cmd.Commands[0].Name, "csv") + } + if cmd.Commands[1].Name != "clusterpolicy" { + t.Errorf("NewCommand().Commands[1].Name = %q, want %q", cmd.Commands[1].Name, "clusterpolicy") + } +} diff --git a/cmd/gpuop-cfg/main.go b/cmd/gpuop-cfg/main.go index 8b69c5000..2c989d148 100644 --- a/cmd/gpuop-cfg/main.go +++ b/cmd/gpuop-cfg/main.go @@ -23,6 +23,7 @@ import ( log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v3" + listimages "github.com/NVIDIA/gpu-operator/cmd/gpuop-cfg/list-images" "github.com/NVIDIA/gpu-operator/cmd/gpuop-cfg/validate" ) @@ -66,6 +67,7 @@ func main() { // Define the subcommands c.Commands = []*cli.Command{ validate.NewCommand(logger), + listimages.NewCommand(logger), } err := c.Run(context.Background(), os.Args) diff --git a/cmd/gpuop-cfg/validate/clusterpolicy/images.go b/cmd/gpuop-cfg/validate/clusterpolicy/images.go index f91c2461d..69520d7bd 100644 --- a/cmd/gpuop-cfg/validate/clusterpolicy/images.go +++ b/cmd/gpuop-cfg/validate/clusterpolicy/images.go @@ -24,132 +24,25 @@ import ( "github.com/regclient/regclient/types/ref" v1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" + "github.com/NVIDIA/gpu-operator/cmd/gpuop-cfg/internal/images" ) func validateImages(ctx context.Context, spec *v1.ClusterPolicySpec) error { - // Driver - path, err := v1.ImagePath(&spec.Driver) + operandImages, err := images.FromClusterPolicy(spec) if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) + return err } - // For driver, we must append the os-tag - path += "-ubuntu22.04" - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) - } - - // Toolkit - path, err = v1.ImagePath(&spec.Toolkit) - if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) - } - - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) - } - - // Device Plugin - path, err = v1.ImagePath(&spec.DevicePlugin) - if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) - } - - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) - } - - // DCGMExporter - path, err = v1.ImagePath(&spec.DCGMExporter) - if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) - } - - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) - } - - // DCGM - path, err = v1.ImagePath(&spec.DCGM) - if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) - } - - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) - } - - // GPUFeatureDiscovery - path, err = v1.ImagePath(&spec.GPUFeatureDiscovery) - if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) - } - - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) - } - - // MIGManager - path, err = v1.ImagePath(&spec.MIGManager) - if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) - } - - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) - } - - // GPUDirectStorage - path, err = v1.ImagePath(spec.GPUDirectStorage) - if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) - } - // For GDS driver, we must append the os-tag - path += "-ubuntu22.04" - - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) - } - - // VFIOManager - path, err = v1.ImagePath(&spec.VFIOManager) - if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) - } - - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) - } - - // SandboxDevicePlugin - path, err = v1.ImagePath(&spec.SandboxDevicePlugin) - if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) - } - - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) - } - - // VGPUDeviceManager - path, err = v1.ImagePath(&spec.VGPUDeviceManager) - if err != nil { - return fmt.Errorf("failed to construct the image path: %v", err) - } - - err = validateImage(ctx, path) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", path, err) + for _, op := range operandImages { + path := op.Image + // For Driver and GPUDirectStorage, we must append the os-tag + if op.Name == "Driver" || op.Name == "GPUDirectStorage" { + path += "-ubuntu22.04" + } + err = validateImage(ctx, path) + if err != nil { + return fmt.Errorf("failed to validate image %s: %v", path, err) + } } return nil diff --git a/cmd/gpuop-cfg/validate/csv/images.go b/cmd/gpuop-cfg/validate/csv/images.go index 3d8e4f532..833e8c27f 100644 --- a/cmd/gpuop-cfg/validate/csv/images.go +++ b/cmd/gpuop-cfg/validate/csv/images.go @@ -19,41 +19,21 @@ package csv import ( "context" "fmt" - "strings" "github.com/operator-framework/api/pkg/operators/v1alpha1" "github.com/regclient/regclient" "github.com/regclient/regclient/types/ref" + + "github.com/NVIDIA/gpu-operator/cmd/gpuop-cfg/internal/images" ) func validateImages(ctx context.Context, csv *v1alpha1.ClusterServiceVersion) error { - // validate all 'relatedImages' - images := csv.Spec.RelatedImages - for _, image := range images { - err := validateImage(ctx, image.Image) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", image.Name, err) - } - } - - // get the gpu-operator deployment spec - deployment := csv.Spec.InstallStrategy.StrategySpec.DeploymentSpecs[0] - ctr := deployment.Spec.Template.Spec.Containers[0] + imagePaths := images.FromCSV(csv) - // validate the gpu-operator image - err := validateImage(ctx, ctr.Image) - if err != nil { - return fmt.Errorf("failed to validate image %s: %v", ctr.Image, err) - } - - // validate all operand images configured as env vars - for _, env := range ctr.Env { - if !strings.HasSuffix(env.Name, "_IMAGE") { - continue - } - err = validateImage(ctx, env.Value) + for _, path := range imagePaths { + err := validateImage(ctx, path) if err != nil { - return fmt.Errorf("failed to validate image %s: %v", env.Name, err) + return fmt.Errorf("failed to validate image %s: %v", path, err) } }