-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
299 additions
and
2 deletions.
There are no files selected for viewing
8 changes: 8 additions & 0 deletions
8
...erators/gpu-operator-certified/instance/base/console-plugin-nvidia-gpu/kustomization.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
apiVersion: kustomize.config.k8s.io/v1beta1 | ||
kind: Kustomization | ||
|
||
resources: | ||
- templates/configmap.yaml | ||
- templates/consoleplugin.yaml | ||
- templates/deployment.yaml | ||
- templates/service.yaml |
28 changes: 28 additions & 0 deletions
28
...s/gpu-operator-certified/instance/base/console-plugin-nvidia-gpu/templates/configmap.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
--- | ||
# Source: console-plugin-nvidia-gpu/templates/configmap.yaml | ||
apiVersion: v1 | ||
kind: ConfigMap | ||
metadata: | ||
name: release-name-console-plugin-nvidia-gpu | ||
labels: | ||
helm.sh/chart: console-plugin-nvidia-gpu-0.2.3 | ||
app.kubernetes.io/name: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: release-name | ||
app.kubernetes.io/version: "latest" | ||
app.kubernetes.io/managed-by: Helm | ||
app.kubernetes.io/component: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: console-plugin-nvidia-gpu | ||
app.kubernetes.io/part-of: console-plugin-nvidia-gpu | ||
data: | ||
dcgm-metrics.csv: | | ||
DCGM_FI_PROF_GR_ENGINE_ACTIVE, gauge, gpu utilization. | ||
DCGM_FI_DEV_MEM_COPY_UTIL, gauge, mem utilization. | ||
DCGM_FI_DEV_ENC_UTIL, gauge, enc utilization. | ||
DCGM_FI_DEV_DEC_UTIL, gauge, dec utilization. | ||
DCGM_FI_DEV_POWER_USAGE, gauge, power usage. | ||
DCGM_FI_DEV_POWER_MGMT_LIMIT_MAX, gauge, power mgmt limit. | ||
DCGM_FI_DEV_GPU_TEMP, gauge, gpu temp. | ||
DCGM_FI_DEV_SM_CLOCK, gauge, sm clock. | ||
DCGM_FI_DEV_MAX_SM_CLOCK, gauge, max sm clock. | ||
DCGM_FI_DEV_MEM_CLOCK, gauge, mem clock. | ||
DCGM_FI_DEV_MAX_MEM_CLOCK, gauge, max mem clock. |
22 changes: 22 additions & 0 deletions
22
...u-operator-certified/instance/base/console-plugin-nvidia-gpu/templates/consoleplugin.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
--- | ||
# Source: console-plugin-nvidia-gpu/templates/consoleplugin.yaml | ||
apiVersion: console.openshift.io/v1alpha1 | ||
kind: ConsolePlugin | ||
metadata: | ||
name: release-name-console-plugin-nvidia-gpu | ||
labels: | ||
helm.sh/chart: console-plugin-nvidia-gpu-0.2.3 | ||
app.kubernetes.io/name: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: release-name | ||
app.kubernetes.io/version: "latest" | ||
app.kubernetes.io/managed-by: Helm | ||
app.kubernetes.io/component: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: console-plugin-nvidia-gpu | ||
app.kubernetes.io/part-of: console-plugin-nvidia-gpu | ||
spec: | ||
displayName: 'Console Plugin NVIDIA GPU Template' | ||
service: | ||
name: release-name-console-plugin-nvidia-gpu | ||
namespace: sandbox | ||
port: 9443 | ||
basePath: '/' |
61 changes: 61 additions & 0 deletions
61
.../gpu-operator-certified/instance/base/console-plugin-nvidia-gpu/templates/deployment.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
--- | ||
# Source: console-plugin-nvidia-gpu/templates/deployment.yaml | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: release-name-console-plugin-nvidia-gpu | ||
labels: | ||
helm.sh/chart: console-plugin-nvidia-gpu-0.2.3 | ||
app.kubernetes.io/name: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: release-name | ||
app.kubernetes.io/version: "latest" | ||
app.kubernetes.io/managed-by: Helm | ||
app.kubernetes.io/component: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: console-plugin-nvidia-gpu | ||
app.kubernetes.io/part-of: console-plugin-nvidia-gpu | ||
app.openshift.io/runtime-namespace: console-plugin-nvidia-gpu | ||
spec: | ||
replicas: 1 | ||
selector: | ||
matchLabels: | ||
app.kubernetes.io/name: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: release-name | ||
template: | ||
metadata: | ||
labels: | ||
app.kubernetes.io/name: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: release-name | ||
spec: | ||
securityContext: | ||
runAsNonRoot: true | ||
containers: | ||
- name: console-plugin-nvidia-gpu | ||
image: "quay.io/edge-infrastructure/console-plugin-nvidia-gpu:latest" | ||
imagePullPolicy: Always | ||
securityContext: | ||
allowPrivilegeEscalation: false | ||
ports: | ||
- containerPort: 9443 | ||
protocol: TCP | ||
volumeMounts: | ||
- name: plugin-serving-cert | ||
readOnly: true | ||
mountPath: /var/serving-cert | ||
resources: | ||
{} | ||
volumes: | ||
- name: plugin-serving-cert | ||
secret: | ||
secretName: plugin-serving-cert | ||
defaultMode: 420 | ||
- name: nginx-conf | ||
configMap: | ||
name: nginx-conf | ||
defaultMode: 420 | ||
restartPolicy: Always | ||
dnsPolicy: ClusterFirst | ||
strategy: | ||
type: RollingUpdate | ||
rollingUpdate: | ||
maxUnavailable: 25% | ||
maxSurge: 25% |
28 changes: 28 additions & 0 deletions
28
...ors/gpu-operator-certified/instance/base/console-plugin-nvidia-gpu/templates/service.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
--- | ||
# Source: console-plugin-nvidia-gpu/templates/service.yaml | ||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: release-name-console-plugin-nvidia-gpu | ||
labels: | ||
helm.sh/chart: console-plugin-nvidia-gpu-0.2.3 | ||
app.kubernetes.io/name: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: release-name | ||
app.kubernetes.io/version: "latest" | ||
app.kubernetes.io/managed-by: Helm | ||
app.kubernetes.io/component: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: console-plugin-nvidia-gpu | ||
app.kubernetes.io/part-of: console-plugin-nvidia-gpu | ||
annotations: | ||
service.alpha.openshift.io/serving-cert-secret-name: plugin-serving-cert | ||
spec: | ||
ports: | ||
- name: 9443-tcp | ||
protocol: TCP | ||
port: 9443 | ||
targetPort: 9443 | ||
selector: | ||
app.kubernetes.io/name: console-plugin-nvidia-gpu | ||
app.kubernetes.io/instance: release-name | ||
type: ClusterIP | ||
sessionAffinity: None |
20 changes: 20 additions & 0 deletions
20
...ertified/instance/base/console-plugin-nvidia-gpu/templates/tests/test-plugin-service.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
--- | ||
# Source: console-plugin-nvidia-gpu/templates/tests/test-plugin-service.yaml | ||
apiVersion: v1 | ||
kind: Pod | ||
metadata: | ||
name: "release-name-service-test" | ||
annotations: | ||
"helm.sh/hook": test | ||
spec: | ||
containers: | ||
- name: release-name-service-test | ||
image: quay.io/cilium/alpine-curl:v1.4.0 | ||
imagePullPolicy: "Always" | ||
args: | ||
- -XGET | ||
- --silent | ||
- --fail | ||
- --insecure | ||
- https://release-name-console-plugin-nvidia-gpu.sandbox.svc:9443/plugin-manifest.json | ||
restartPolicy: Never |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
118 changes: 118 additions & 0 deletions
118
components/operators/gpu-operator-certified/instance/base/setup-console-plugin-job.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRole | ||
metadata: | ||
name: nvidia-gpu-console-setup | ||
rules: | ||
- apiGroups: | ||
- '' | ||
resources: | ||
- configmaps | ||
verbs: | ||
- '*' | ||
- apiGroups: | ||
- operator.openshift.io | ||
resources: | ||
- consoles | ||
resourceNames: | ||
- cluster | ||
verbs: | ||
- '*' | ||
- apiGroups: | ||
- console.openshift.io | ||
resources: | ||
- consoleplugins | ||
resourceNames: | ||
- release-name-console-plugin-nvidia-gpu | ||
verbs: | ||
- '*' | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRoleBinding | ||
metadata: | ||
name: nvidia-gpu-console-setup | ||
roleRef: | ||
apiGroup: rbac.authorization.k8s.io | ||
kind: ClusterRole | ||
name: nvidia-gpu-console-setup | ||
subjects: | ||
- kind: ServiceAccount | ||
name: nvidia-gpu-console-setup | ||
namespace: nvidia-gpu-operator | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: RoleBinding | ||
metadata: | ||
name: nvidia-gpu-console-setup | ||
namespace: openshift-config-managed | ||
roleRef: | ||
apiGroup: rbac.authorization.k8s.io | ||
kind: ClusterRole | ||
name: nvidia-gpu-console-setup | ||
subjects: | ||
- kind: ServiceAccount | ||
name: nvidia-gpu-console-setup | ||
namespace: nvidia-gpu-operator | ||
--- | ||
apiVersion: v1 | ||
kind: ServiceAccount | ||
metadata: | ||
name: nvidia-gpu-console-setup | ||
namespace: nvidia-gpu-operator | ||
--- | ||
apiVersion: batch/v1 | ||
kind: Job | ||
metadata: | ||
annotations: | ||
argocd.argoproj.io/hook: Sync | ||
# argocd.argoproj.io/hook-delete-policy: HookSucceeded | ||
generateName: nvidia-gpu-console-setup- | ||
name: nvidia-gpu-console-setup | ||
namespace: nvidia-gpu-operator | ||
spec: | ||
template: | ||
spec: | ||
containers: | ||
- name: nvidia-gpu-console-setup | ||
image: image-registry.openshift-image-registry.svc:5000/openshift/cli:latest | ||
env: | ||
- name: NAMESPACE | ||
valueFrom: | ||
fieldRef: | ||
fieldPath: metadata.namespace | ||
command: | ||
- /bin/bash | ||
- -c | ||
- | | ||
#!/usr/bin/env bash | ||
set -x | ||
cd /tmp | ||
nvidia_setup_console_plugin(){ | ||
if which helm; then | ||
helm repo add rh-ecosystem-edge https://rh-ecosystem-edge.github.io/console-plugin-nvidia-gpu || true | ||
helm repo update > /dev/null 2>&1 | ||
helm upgrade --install -n nvidia-gpu-operator console-plugin-nvidia-gpu rh-ecosystem-edge/console-plugin-nvidia-gpu > /dev/null 2>&1 | ||
else | ||
return | ||
fi | ||
if oc get consoles.operator.openshift.io cluster --output=jsonpath="{.spec.plugins}" >/dev/null; then | ||
oc patch consoles.operator.openshift.io cluster --patch '{ "spec": { "plugins": ["console-plugin-nvidia-gpu"] } }' --type=merge | ||
else | ||
oc get consoles.operator.openshift.io cluster --output=jsonpath="{.spec.plugins}" | grep -q console-plugin-nvidia-gpu || \ | ||
oc patch consoles.operator.openshift.io cluster --patch '[{"op": "add", "path": "/spec/plugins/-", "value": "console-plugin-nvidia-gpu" }]' --type=json | ||
fi | ||
oc patch clusterpolicies.nvidia.com gpu-cluster-policy --patch '{ "spec": { "dcgmExporter": { "config": { "name": "console-plugin-nvidia-gpu" } } } }' --type=merge | ||
oc -n nvidia-gpu-operator get deploy -l app.kubernetes.io/name=console-plugin-nvidia-gpu | ||
} | ||
nvidia_setup_console_plugin | ||
restartPolicy: Never | ||
terminationGracePeriodSeconds: 30 | ||
serviceAccount: nvidia-gpu-console-setup | ||
serviceAccountName: nvidia-gpu-console-setup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters