From 96750d5e83302909cbce25a1f35372f5fcb77064 Mon Sep 17 00:00:00 2001 From: Dylan Date: Wed, 11 Dec 2024 09:41:06 -0500 Subject: [PATCH] nerc-ocp-prod: Add RHOAI GPU accelereatorProfiles Adds unique acceleratorProfiles for each GPU type in production cluster. These profiles will show up under accelerators in the RHOAI workbench wizard. Part of: https://github.com/nerc-project/operations/issues/849 Closes: https://github.com/nerc-project/operations/issues/847 --- .../a100-acceleratorprofile.yaml | 14 ++++++++++++++ .../rhoai/acceleratorprofiles/kustomization.yaml | 6 ++++++ .../v100-acceleratorprofile.yaml | 14 ++++++++++++++ .../nerc-ocp-prod/rhoai/kustomization.yaml | 1 + 4 files changed, 35 insertions(+) create mode 100644 cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/a100-acceleratorprofile.yaml create mode 100644 cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/kustomization.yaml create mode 100644 cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/v100-acceleratorprofile.yaml diff --git a/cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/a100-acceleratorprofile.yaml b/cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/a100-acceleratorprofile.yaml new file mode 100644 index 00000000..4faddd40 --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/a100-acceleratorprofile.yaml @@ -0,0 +1,14 @@ +apiVersion: dashboard.opendatahub.io/v1 +kind: AcceleratorProfile +metadata: + name: nvidia-a100-gpu + namespace: redhat-ods-applications +spec: + displayName: NVIDIA A100 GPU + enabled: true + identifier: nvidia.com/gpu + tolerations: + - effect: NoSchedule + key: nvidia.com/gpu.product + operator: Equal + value: NVIDIA-A100-SXM4-40GB diff --git a/cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/kustomization.yaml b/cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/kustomization.yaml new file mode 100644 index 00000000..8094ae5a --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - a100-acceleratorprofile.yaml + - v100-acceleratorprofile.yaml diff --git a/cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/v100-acceleratorprofile.yaml b/cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/v100-acceleratorprofile.yaml new file mode 100644 index 00000000..f8e9484a --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-prod/rhoai/acceleratorprofiles/v100-acceleratorprofile.yaml @@ -0,0 +1,14 @@ +apiVersion: dashboard.opendatahub.io/v1 +kind: AcceleratorProfile +metadata: + name: nvidia-v100-gpu + namespace: redhat-ods-applications +spec: + displayName: NVIDIA V100 GPU + enabled: true + identifier: nvidia.com/gpu + tolerations: + - effect: NoSchedule + key: nvidia.com/gpu.product + operator: Equal + value: Tesla-V100-PCIE-32GB diff --git a/cluster-scope/overlays/nerc-ocp-prod/rhoai/kustomization.yaml b/cluster-scope/overlays/nerc-ocp-prod/rhoai/kustomization.yaml index 3350eb1f..439e040b 100644 --- a/cluster-scope/overlays/nerc-ocp-prod/rhoai/kustomization.yaml +++ b/cluster-scope/overlays/nerc-ocp-prod/rhoai/kustomization.yaml @@ -4,3 +4,4 @@ resources: - odhdashboardconfigs/odh-dashboard-config.yaml - imagestreams/ucsls-f24-imagestream.yaml - imagestreams/oauth-proxy.yaml +- acceleratorprofiles