Added support for llama2-chat and codellama models (#43)

* Added support for llama2-chat and codellama models * Updated documentation link in README.md
AyushSawant18588 · Dec 7, 2023 · 0ced78f · 0ced78f
1 parent f0ab70f
commit 0ced78f
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 28 deletions.
diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@ This new solution includes:
 - The management interface for enhanced terminal UI or standard CLI
 - Support for a curated set of LLMs including Llama2, Falcon and MPT
 
-Refer to the official [GPT-in-a-Box Documentation](https://opendocs.nutanix.com/gpt-in-a-box/kubernetes/getting_started/) to deploy and validate the inference server on Kubernetes cluster
+Refer to the official [GPT-in-a-Box Documentation](https://opendocs.nutanix.com/gpt-in-a-box/overview/) to deploy and validate the inference server on Kubernetes cluster
 
 ### License
 All source code and other contents in this repository are covered by the Nutanix License and Services Agreement, which is located at https://www.nutanix.com/legal/eula
diff --git a/llm/generate.py b/llm/generate.py
@@ -242,19 +242,15 @@ class with relevant information.
                 )
         else:
             print(
-                "## If you want to create a model archive file with the supported models, "
+                "## If you want to create a model archive file for supported models, "
                 "make sure you're model name is present in the below : "
             )
             print(list(models.keys()))
             print(
-                "If you want to create a model archive file for a custom model, there "
-                "are two methods:\n"
-                "1. If you have already downloaded the custom model files, please include"
-                " the --skip_download flag and provide the model_path directory which contains "
-                "the model files.\n"
-                "2. If you need to download the model files, provide the HuggingFace "
-                "repository ID along with a model_path driectory where the model "
-                "files are to be downloaded."
+                "\nIf you want to create a model archive file for"
+                " either a Custom Model or other HuggingFace models, "
+                "refer to the official GPT-in-a-Box documentation: "
+                "https://opendocs.nutanix.com/gpt-in-a-box/overview/"
             )
             sys.exit(1)
 

diff --git a/llm/model_config.json b/llm/model_config.json
@@ -13,8 +13,7 @@
             "batch_size" : 1,
             "max_batch_delay" : 200,
             "response_timeout" : 2000
-        },
-        "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
+        }
     },
     "falcon_7b": {
         "repo_id": "tiiuae/falcon-7b",
@@ -30,8 +29,7 @@
             "batch_size" : 1,
             "max_batch_delay" : 200,
             "response_timeout" : 2000
-        },
-        "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
+        }
     },
     "llama2_7b": {
         "repo_id": "meta-llama/Llama-2-7b-hf",
@@ -47,24 +45,36 @@
             "batch_size" : 1,
             "max_batch_delay" : 200,
             "response_timeout" : 2000
-        },
-        "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
-    },
-    "phi-1_5": {
-        "repo_id": "microsoft/phi-1_5",
-        "repo_version": "b6a7e2fe15c21f5847279f23e280cc5a0e7049ef",
-        "handler": "handler.py",
-        "registration_params":{
-            "batch_size" : 1,
-            "max_batch_delay" : 200,
-            "response_timeout" : 2000
-        },
-        "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
+        }
     },
     "gpt2": {
         "repo_id": "gpt2",
         "repo_version": "11c5a3d5811f50298f278a704980280950aedb10",
         "handler": "handler.py",
-        "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
+        "registration_params": {
+            "batch_size": 1,
+            "max_batch_delay": 200,
+            "response_timeout": 2000
+        }
+    },
+    "codellama_7b_python": {
+        "repo_id": "codellama/CodeLlama-7b-Python-hf",
+        "repo_version": "7ee7b6beb0dece09b0431ea46c03bc1724e21572",
+        "handler": "handler.py",
+        "registration_params": {
+            "batch_size": 1,
+            "max_batch_delay": 200,
+            "response_timeout": 2000
+        }
+    },
+    "llama2_7b_chat": {
+        "repo_id": "meta-llama/Llama-2-7b-chat-hf",
+        "repo_version": "94b07a6e30c3292b8265ed32ffdeccfdadf434a8",
+        "handler": "handler.py",
+        "registration_params": {
+            "batch_size": 1,
+            "max_batch_delay": 200,
+            "response_timeout": 2000
+        }
     }
 }