Skip to content

Commit

Permalink
generate manifests
Browse files Browse the repository at this point in the history
  • Loading branch information
samos123 committed Dec 3, 2024
1 parent 63ee3e0 commit 6a77c00
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 0 deletions.
11 changes: 11 additions & 0 deletions manifests/models/qwen2.5-7b-cpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Source: models/templates/models.yaml
apiVersion: kubeai.org/v1
kind: Model
metadata:
name: qwen2.5-7b-cpu
spec:
features: [TextGeneration]
owner:
url: ollama://qwen2.5:7b
engine: OLlama
resourceProfile: cpu:2
11 changes: 11 additions & 0 deletions manifests/models/qwen2.5-coder-1.5b-cpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Source: models/templates/models.yaml
apiVersion: kubeai.org/v1
kind: Model
metadata:
name: qwen2.5-coder-1.5b-cpu
spec:
features: [TextGeneration]
owner:
url: ollama://qwen2.5-coder:1.5b
engine: OLlama
resourceProfile: cpu:1
19 changes: 19 additions & 0 deletions manifests/models/qwen2.5-coder-1.5b-rtx4070-8gb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Source: models/templates/models.yaml
apiVersion: kubeai.org/v1
kind: Model
metadata:
name: qwen2.5-coder-1.5b-rtx4070-8gb
spec:
features: [TextGeneration]
owner:
url: hf://Qwen/Qwen2.5-Coder-1.5B-Instruct
engine: VLLM
args:
- --max-model-len=2048
- --max-num-seqs=16
- --quantization=fp8
- --kv-cache-dtype=fp8
env:
VLLM_ATTENTION_BACKEND: FLASHINFER
minReplicas: 1
resourceProfile: nvidia-gpu-rtx4070-8gb:1

0 comments on commit 6a77c00

Please sign in to comment.