generate manifests

substratusai · Dec 3, 2024 · 6a77c00 · 6a77c00
1 parent 63ee3e0
commit 6a77c00
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 0 deletions.
diff --git a/manifests/models/qwen2.5-7b-cpu.yaml b/manifests/models/qwen2.5-7b-cpu.yaml
@@ -0,0 +1,11 @@
+# Source: models/templates/models.yaml
+apiVersion: kubeai.org/v1
+kind: Model
+metadata:
+  name: qwen2.5-7b-cpu
+spec:
+  features: [TextGeneration]
+  owner:
+  url: ollama://qwen2.5:7b
+  engine: OLlama
+  resourceProfile: cpu:2
diff --git a/manifests/models/qwen2.5-coder-1.5b-cpu.yaml b/manifests/models/qwen2.5-coder-1.5b-cpu.yaml
@@ -0,0 +1,11 @@
+# Source: models/templates/models.yaml
+apiVersion: kubeai.org/v1
+kind: Model
+metadata:
+  name: qwen2.5-coder-1.5b-cpu
+spec:
+  features: [TextGeneration]
+  owner:
+  url: ollama://qwen2.5-coder:1.5b
+  engine: OLlama
+  resourceProfile: cpu:1
diff --git a/manifests/models/qwen2.5-coder-1.5b-rtx4070-8gb.yaml b/manifests/models/qwen2.5-coder-1.5b-rtx4070-8gb.yaml
@@ -0,0 +1,19 @@
+# Source: models/templates/models.yaml
+apiVersion: kubeai.org/v1
+kind: Model
+metadata:
+  name: qwen2.5-coder-1.5b-rtx4070-8gb
+spec:
+  features: [TextGeneration]
+  owner:
+  url: hf://Qwen/Qwen2.5-Coder-1.5B-Instruct
+  engine: VLLM
+  args:
+    - --max-model-len=2048
+    - --max-num-seqs=16
+    - --quantization=fp8
+    - --kv-cache-dtype=fp8
+  env:
+    VLLM_ATTENTION_BACKEND: FLASHINFER
+  minReplicas: 1
+  resourceProfile: nvidia-gpu-rtx4070-8gb:1