Skip to content

Commit

Permalink
add: nim
Browse files Browse the repository at this point in the history
  • Loading branch information
codekow committed Nov 18, 2024
1 parent 2dc5315 commit a06a347
Show file tree
Hide file tree
Showing 14 changed files with 199 additions and 1 deletion.
19 changes: 19 additions & 0 deletions components/app-configs/nim/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Notes

## Quickstart

Query NIM Open AI API

```sh
URL=https://$(oc get route -o go-template='{{.spec.host}}' nim)/v1/completions

curl -s -X 'POST' \
"${URL}" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"model": "meta/llama3-8b-instruct",
"prompt": "Once upon a time",
"max_tokens": 64
}' | jq .choices[0].text
```
69 changes: 69 additions & 0 deletions components/app-configs/nim/base/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama3
spec:
replicas: 1
strategy:
type: Recreate
template:
spec:
containers:
- name: nim
image: 'nvcr.io/nim/meta/llama3-8b-instruct:1.0.0'
env:
- name: NGC_API_KEY
value: your-key-here
ports:
- containerPort: 8000
protocol: TCP
name: api
resources:
limits:
cpu: '2'
memory: 8Gi
requests:
cpu: 500m
memory: 8Gi
# livenessProbe:
# httpGet:
# path: /
# port: http
# scheme: HTTP
# timeoutSeconds: 5
# periodSeconds: 5
# successThreshold: 1
# failureThreshold: 3
# readinessProbe:
# httpGet:
# path: /
# port: notebook
# scheme: HTTP
# timeoutSeconds: 1
# periodSeconds: 10
# successThreshold: 1
# failureThreshold: 3
# startupProbe:
# httpGet:
# path: /
# port: notebook
# scheme: HTTP
# timeoutSeconds: 1
# periodSeconds: 5
# successThreshold: 1
# failureThreshold: 10
volumeMounts:
- name: shm
mountPath: /dev/shm
- name: nim-cache
mountPath: /opt/nim/.cache
subPath: data
volumes:
- name: shm
emptyDir:
medium: Memory
sizeLimit: 512Mi
- name: nim-cache
persistentVolumeClaim:
claimName: nim
13 changes: 13 additions & 0 deletions components/app-configs/nim/base/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

labels:
- includeSelectors: true
pairs:
component: nim

resources:
- deployment.yaml
- pvc.yaml
- route.yaml
- service.yaml
11 changes: 11 additions & 0 deletions components/app-configs/nim/base/pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: nim
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
18 changes: 18 additions & 0 deletions components/app-configs/nim/base/route.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
apiVersion: route.openshift.io/v1
kind: Route
metadata:
name: nim
annotations:
argocd.argoproj.io/sync-wave: "1"
spec:
port:
targetPort: 8000
tls:
insecureEdgeTerminationPolicy: Redirect
termination: edge
to:
kind: Service
name: api
weight: 100
wildcardPolicy: None
12 changes: 12 additions & 0 deletions components/app-configs/nim/base/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
apiVersion: v1
kind: Service
metadata:
name: nim
spec:
ports:
- name: api
port: 8000
targetPort: 8000
sessionAffinity: None
type: ClusterIP
7 changes: 7 additions & 0 deletions components/app-configs/nim/components/gpu/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component

patches:
- path: patch-deployment.yaml
target:
kind: Deployment
12 changes: 12 additions & 0 deletions components/app-configs/nim/components/gpu/patch-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: nim
spec:
template:
spec:
containers:
- name: nim
resources:
limits:
nvidia.com/gpu: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component

resources:
- namespace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: demo-nim
annotations:
openshift.io/display-name: "Demo - NIM"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

resources:
- ../../base
11 changes: 11 additions & 0 deletions components/app-configs/nim/overlays/demo/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

namespace: demo-nim

components:
- ../../components/gpu
- ../../components/namespace

resources:
- ../../base
8 changes: 8 additions & 0 deletions components/app-configs/nim/overlays/gpu/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

components:
- ../../components/gpu

resources:
- ../../base
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ spec:
- |
#!/bin/sh
jupyter lab \
--ServerApp.ip=0.0.0.0:8888 \
--ServerApp.ip=0.0.0.0 \
--ServerApp.port=8888 \
--ServerApp.allow_origin="*" \
--ServerApp.open_browser=False \
--ServerApp.token=''
Expand Down

0 comments on commit a06a347

Please sign in to comment.