diff --git a/config/rbac/daemonsets_role.yaml b/config/rbac/daemonsets_role.yaml new file mode 100644 index 000000000..d0e3c3248 --- /dev/null +++ b/config/rbac/daemonsets_role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: gpu-operator-daemonsets-role +rules: +- apiGroups: + - apps + resources: + - daemonsets + verbs: + - create + - delete + - get + - list + - patch + - update + - watch diff --git a/config/rbac/daemonsets_role_binding.yaml b/config/rbac/daemonsets_role_binding.yaml new file mode 100644 index 000000000..fe0cc3dcb --- /dev/null +++ b/config/rbac/daemonsets_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: gpu-operator-daemonsets-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: gpu-operator-daemonsets-role +subjects: +- kind: ServiceAccount + name: gpu-operator + namespace: system diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index 27cb1d17f..b560ae100 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -2,6 +2,8 @@ resources: - service_account.yaml - role.yaml - role_binding.yaml +- daemonsets_role.yaml +- daemonsets_role_binding.yaml - leader_election_role.yaml - leader_election_role_binding.yaml # Comment the following 4 lines if you want to disable diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 7a631b9e3..b2b6fc3b7 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -10,8 +10,6 @@ rules: - configmaps - endpoints - events - - namespaces - - nodes - persistentvolumeclaims - pods - pods/eviction @@ -27,6 +25,23 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - patch +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - patch + - update + - watch - apiGroups: - apiextensions.k8s.io resources: @@ -39,6 +54,7 @@ rules: - apps resources: - controllerrevisions + - daemonsets verbs: - get - list @@ -46,7 +62,6 @@ rules: - apiGroups: - apps resources: - - daemonsets - deployments - replicasets - statefulsets @@ -163,7 +178,9 @@ rules: - rolebindings - roles verbs: - - '*' + - create + - delete + - update - apiGroups: - route.openshift.io resources: diff --git a/controllers/clusterpolicy_controller.go b/controllers/clusterpolicy_controller.go index d16d2d445..33e0ee587 100644 --- a/controllers/clusterpolicy_controller.go +++ b/controllers/clusterpolicy_controller.go @@ -68,11 +68,14 @@ type ClusterPolicyReconciler struct { // +kubebuilder:rbac:groups=config.openshift.io,resources=clusterversions;proxies,verbs=get;list;watch // +kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=use,resourceNames=privileged -// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles;clusterrolebindings;roles;rolebindings,verbs=* -// +kubebuilder:rbac:groups="",resources=namespaces;serviceaccounts;pods;pods/eviction;services;services/finalizers;endpoints,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups="",resources=persistentvolumeclaims;events;configmaps;secrets;nodes,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=apps,resources=deployments;daemonsets;replicasets;statefulsets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles;clusterrolebindings;roles;rolebindings,verbs=create;update;delete +// +kubebuilder:rbac:groups="",resources=namespaces,verbs=get;patch +// +kubebuilder:rbac:groups="",resources=serviceaccounts;pods;pods/eviction;services;services/finalizers;endpoints,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups="",resources=persistentvolumeclaims;events;configmaps;secrets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups=apps,resources=deployments;replicasets;statefulsets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=apps,resources=controllerrevisions,verbs=get;list;watch +// +kubebuilder:rbac:groups=apps,resources=daemonsets,verbs=get;list;watch // +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors;prometheusrules,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=scheduling.k8s.io,resources=priorityclasses,verbs=get;list;watch;create // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch diff --git a/controllers/object_controls.go b/controllers/object_controls.go index b436bcab1..51ba47f35 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -4065,13 +4065,14 @@ func ocpHasDriverToolkitImageStream(n *ClusterPolicyController) (bool, error) { } func (n ClusterPolicyController) cleanupAllDriverDaemonSets(ctx context.Context) error { - // Get all DaemonSets owned by ClusterPolicy - // - // (cdesiniotis) There is a limitation with the controller-runtime client where only a single field selector - // is allowed when specifying ListOptions or DeleteOptions. - // See GH issue: https://github.com/kubernetes-sigs/controller-runtime/issues/612 + // Get all DaemonSets owned by ClusterPolicy in operator namespace list := &appsv1.DaemonSetList{} - err := n.client.List(ctx, list, client.MatchingFields{clusterPolicyControllerIndexKey: n.singleton.Name}) + err := n.client.List( + ctx, + list, + client.MatchingFields{clusterPolicyControllerIndexKey: n.singleton.Name}, + client.InNamespace(n.operatorNamespace), + ) if err != nil { return fmt.Errorf("failed to list all NVIDIA driver daemonsets owned by ClusterPolicy: %w", err) } @@ -4099,6 +4100,7 @@ func (n ClusterPolicyController) cleanupStalePrecompiledDaemonsets(ctx context.C client.MatchingLabels{ precompiledIdentificationLabelKey: precompiledIdentificationLabelValue, }, + client.InNamespace(n.operatorNamespace), } list := &appsv1.DaemonSetList{} err := n.client.List(ctx, list, opts...) @@ -4243,6 +4245,7 @@ func (n ClusterPolicyController) ocpCleanupStaleDriverToolkitDaemonSets(ctx cont client.MatchingLabels{ ocpDriverToolkitIdentificationLabel: ocpDriverToolkitIdentificationValue, }, + client.InNamespace(n.operatorNamespace), } list := &appsv1.DaemonSetList{} @@ -4426,7 +4429,7 @@ func (n ClusterPolicyController) cleanupUnusedDriverDaemonSets(ctx context.Conte // pairs If no error happens, returns the number of Pods belonging to // the DaemonSet. func (n ClusterPolicyController) cleanupDriverDaemonsets(ctx context.Context, searchKey string, searchValue string, namePrefix string) (int, error) { - var opts = []client.ListOption{client.MatchingLabels{searchKey: searchValue}} + var opts = []client.ListOption{client.MatchingLabels{searchKey: searchValue}, client.InNamespace(n.operatorNamespace)} dsList := &appsv1.DaemonSetList{} if err := n.client.List(ctx, dsList, opts...); err != nil { diff --git a/controllers/upgrade_controller.go b/controllers/upgrade_controller.go index 7481239d1..7aba3748d 100644 --- a/controllers/upgrade_controller.go +++ b/controllers/upgrade_controller.go @@ -74,7 +74,7 @@ const ( // +kubebuilder:rbac:groups=mellanox.com,resources=*,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;update;patch // +kubebuilder:rbac:groups="",resources=pods,verbs=list -// +kubebuilder:rbac:groups=apps,resources=deployments;daemonsets;replicasets;statefulsets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=apps,resources=deployments;replicasets;statefulsets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=apps,resources=deployments/finalizers,verbs=update // Reconcile is part of the main kubernetes reconciliation loop which aims to diff --git a/deployments/gpu-operator/templates/clusterrole.yaml b/deployments/gpu-operator/templates/clusterrole.yaml index 2af291e22..ad01f1008 100644 --- a/deployments/gpu-operator/templates/clusterrole.yaml +++ b/deployments/gpu-operator/templates/clusterrole.yaml @@ -43,11 +43,7 @@ rules: - clusterrolebindings verbs: - create - - get - - list - - watch - update - - patch - delete - apiGroups: - "" @@ -65,9 +61,6 @@ rules: - namespaces verbs: - get - - list - - watch - - update - patch - apiGroups: - "" diff --git a/deployments/gpu-operator/templates/role.yaml b/deployments/gpu-operator/templates/role.yaml index dc4674c57..f1dc0253b 100644 --- a/deployments/gpu-operator/templates/role.yaml +++ b/deployments/gpu-operator/templates/role.yaml @@ -14,11 +14,7 @@ rules: - rolebindings verbs: - create - - get - - list - - watch - update - - patch - delete - apiGroups: - apps @@ -50,7 +46,6 @@ rules: - secrets - services - services/finalizers - - serviceaccounts verbs: - create - get @@ -59,6 +54,18 @@ rules: - update - patch - delete +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - coordination.k8s.io resources: diff --git a/deployments/gpu-operator/templates/validating-admission-policies.yaml b/deployments/gpu-operator/templates/validating-admission-policies.yaml new file mode 100644 index 000000000..95c9a5594 --- /dev/null +++ b/deployments/gpu-operator/templates/validating-admission-policies.yaml @@ -0,0 +1,141 @@ +{{- if .Capabilities.APIVersions.Has "admissionregistration.k8s.io/v1/ValidatingAdmissionPolicy" }} +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: gpu-operator-runtimeclass-handler-policy + labels: + {{- include "gpu-operator.labels" . | nindent 4 }} + app.kubernetes.io/component: "gpu-operator" +spec: + failurePolicy: Fail + matchConstraints: + resourceRules: + - apiGroups: + - node.k8s.io + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - runtimeclasses + matchConditions: + - name: gpu-operator-service-account + expression: request.userInfo.username == 'system:serviceaccount:{{ .Release.Namespace }}:gpu-operator' + validations: + - expression: >- + object.handler in ['nvidia', 'nvidia-cdi', 'nvidia-legacy', '{{ .Values.operator.runtimeClass }}'{{- range $runtimeClass := (default (list) .Values.kataManager.config.runtimeClasses) }}{{- if $runtimeClass.name }}, '{{ $runtimeClass.name }}'{{- end }}{{- end }}] + message: runtimeclass handler must be one of the allowed runtime classes configured by the chart +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: gpu-operator-runtimeclass-handler-policy-binding + labels: + {{- include "gpu-operator.labels" . | nindent 4 }} + app.kubernetes.io/component: "gpu-operator" +spec: + policyName: gpu-operator-runtimeclass-handler-policy + validationActions: + - Deny +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: gpu-operator-namespace-label-policy + labels: + {{- include "gpu-operator.labels" . | nindent 4 }} + app.kubernetes.io/component: "gpu-operator" +spec: + failurePolicy: Fail + matchConstraints: + resourceRules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - UPDATE + resources: + - namespaces + matchConditions: + - name: gpu-operator-service-account + expression: request.userInfo.username == 'system:serviceaccount:{{ .Release.Namespace }}:gpu-operator' + - name: target-namespace + expression: object.metadata.name in ['{{ .Release.Namespace }}', 'nvidia-gpu-operator'] + validations: + - expression: >- + (!has(oldObject.metadata.labels) || + oldObject.metadata.labels.all(k, v, + (has(object.metadata.labels) && k in object.metadata.labels && object.metadata.labels[k] == v) || + k.startsWith('pod-security.kubernetes.io/') || + k == 'openshift.io/cluster-monitoring' + )) && + (!has(object.metadata.labels) || + object.metadata.labels.all(k, v, + (has(oldObject.metadata.labels) && k in oldObject.metadata.labels && oldObject.metadata.labels[k] == v) || + k.startsWith('pod-security.kubernetes.io/') || + k == 'openshift.io/cluster-monitoring' + )) + message: only pod-security.kubernetes.io/* and openshift.io/cluster-monitoring labels may be added or modified by gpu-operator +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: gpu-operator-namespace-label-policy-binding + labels: + {{- include "gpu-operator.labels" . | nindent 4 }} + app.kubernetes.io/component: "gpu-operator" +spec: + policyName: gpu-operator-namespace-label-policy + validationActions: + - Deny +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: gpu-operator-node-label-policy + labels: + {{- include "gpu-operator.labels" . | nindent 4 }} + app.kubernetes.io/component: "gpu-operator" +spec: + failurePolicy: Fail + matchConstraints: + resourceRules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - UPDATE + resources: + - nodes + matchConditions: + - name: gpu-operator-service-account + expression: request.userInfo.username == 'system:serviceaccount:{{ .Release.Namespace }}:gpu-operator' + validations: + - expression: >- + (!has(oldObject.metadata.labels) || + oldObject.metadata.labels.all(k, v, + (has(object.metadata.labels) && k in object.metadata.labels && object.metadata.labels[k] == v) || + k.startsWith('nvidia.com/') + )) && + (!has(object.metadata.labels) || + object.metadata.labels.all(k, v, + (has(oldObject.metadata.labels) && k in oldObject.metadata.labels && oldObject.metadata.labels[k] == v) || + k.startsWith('nvidia.com/') + )) + message: only nvidia.com/* labels may be added, modified, or removed by gpu-operator on nodes +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: gpu-operator-node-label-policy-binding + labels: + {{- include "gpu-operator.labels" . | nindent 4 }} + app.kubernetes.io/component: "gpu-operator" +spec: + policyName: gpu-operator-node-label-policy + validationActions: + - Deny +{{- end }}