add tokenizer for codellama 70b

microsoft · Feb 23, 2024 · ae9c35d · ae9c35d
1 parent 1b5691c
commit ae9c35d
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 0 deletions.
diff --git a/aicirt/src/bintokens.rs b/aicirt/src/bintokens.rs
@@ -43,6 +43,12 @@ pub fn tokenizers() -> Vec<TokenizerInfo> {
             hf_model: "codellama/CodeLlama-13b-Instruct-hf",
             model_ids: "codellama-13b",
         },
+        TokenizerInfo {
+            name: "llama70",
+            description: "used by codellama-70b; with <step> token",
+            hf_model: "codellama/CodeLlama-70b-Instruct-hf",
+            model_ids: "codellama-70b",
+        },
         TokenizerInfo {
             name: "orca",
             description: "llama",

diff --git a/rllm/rllm-cuda/server.sh b/rllm/rllm-cuda/server.sh
@@ -69,6 +69,9 @@ if [ "$CPP" = 1 ] ; then
     mixtral )
       ARGS="-m https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/blob/main/mixtral-8x7b-instruct-v0.1.Q6_K.gguf"
       ;;
+    code70 )
+      ARGS="-m https://huggingface.co/TheBloke/CodeLlama-70B-Instruct-GGUF/blob/main/codellama-70b-instruct.Q5_K_M.gguf"
+      ;;
     https* )
       ARGS="-m $1"
       ;;
@@ -86,6 +89,7 @@ model_name can a HuggingFace URL pointing to a .gguf file, or one of the followi
   orca     https://huggingface.co/TheBloke/Orca-2-13B-GGUF/blob/main/orca-2-13b.Q8_0.gguf
   mistral  https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/blob/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf
   mixtral  https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/blob/main/mixtral-8x7b-instruct-v0.1.Q6_K.gguf
+  code70   https://huggingface.co/TheBloke/CodeLlama-70B-Instruct-GGUF/blob/main/codellama-70b-instruct.Q5_K_M.gguf
 
 Additionally, "$SELF build" will just build the server, and not run a model.