diff --git a/README.md b/README.md index 0c3c19b..61fa6ae 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ This new solution includes: - The management interface for enhanced terminal UI or standard CLI - Support for a curated set of LLMs including Llama2, Falcon and MPT -Refer to the official [GPT-in-a-Box Documentation](https://opendocs.nutanix.com/gpt-in-a-box/kubernetes/getting_started/) to deploy and validate the inference server on Kubernetes cluster +Refer to the official [GPT-in-a-Box Documentation](https://opendocs.nutanix.com/gpt-in-a-box/overview/) to deploy and validate the inference server on Kubernetes cluster ### License All source code and other contents in this repository are covered by the Nutanix License and Services Agreement, which is located at https://www.nutanix.com/legal/eula diff --git a/llm/generate.py b/llm/generate.py index 0df68dc..1848868 100644 --- a/llm/generate.py +++ b/llm/generate.py @@ -242,19 +242,15 @@ class with relevant information. ) else: print( - "## If you want to create a model archive file with the supported models, " + "## If you want to create a model archive file for supported models, " "make sure you're model name is present in the below : " ) print(list(models.keys())) print( - "If you want to create a model archive file for a custom model, there " - "are two methods:\n" - "1. If you have already downloaded the custom model files, please include" - " the --skip_download flag and provide the model_path directory which contains " - "the model files.\n" - "2. If you need to download the model files, provide the HuggingFace " - "repository ID along with a model_path driectory where the model " - "files are to be downloaded." + "\nIf you want to create a model archive file for" + " either a Custom Model or other HuggingFace models, " + "refer to the official GPT-in-a-Box documentation: " + "https://opendocs.nutanix.com/gpt-in-a-box/overview/" ) sys.exit(1) diff --git a/llm/model_config.json b/llm/model_config.json index 9a2c9ba..cc23400 100644 --- a/llm/model_config.json +++ b/llm/model_config.json @@ -13,8 +13,7 @@ "batch_size" : 1, "max_batch_delay" : 200, "response_timeout" : 2000 - }, - "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"] + } }, "falcon_7b": { "repo_id": "tiiuae/falcon-7b", @@ -30,8 +29,7 @@ "batch_size" : 1, "max_batch_delay" : 200, "response_timeout" : 2000 - }, - "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"] + } }, "llama2_7b": { "repo_id": "meta-llama/Llama-2-7b-hf", @@ -47,24 +45,36 @@ "batch_size" : 1, "max_batch_delay" : 200, "response_timeout" : 2000 - }, - "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"] - }, - "phi-1_5": { - "repo_id": "microsoft/phi-1_5", - "repo_version": "b6a7e2fe15c21f5847279f23e280cc5a0e7049ef", - "handler": "handler.py", - "registration_params":{ - "batch_size" : 1, - "max_batch_delay" : 200, - "response_timeout" : 2000 - }, - "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"] + } }, "gpt2": { "repo_id": "gpt2", "repo_version": "11c5a3d5811f50298f278a704980280950aedb10", "handler": "handler.py", - "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"] + "registration_params": { + "batch_size": 1, + "max_batch_delay": 200, + "response_timeout": 2000 + } + }, + "codellama_7b_python": { + "repo_id": "codellama/CodeLlama-7b-Python-hf", + "repo_version": "7ee7b6beb0dece09b0431ea46c03bc1724e21572", + "handler": "handler.py", + "registration_params": { + "batch_size": 1, + "max_batch_delay": 200, + "response_timeout": 2000 + } + }, + "llama2_7b_chat": { + "repo_id": "meta-llama/Llama-2-7b-chat-hf", + "repo_version": "94b07a6e30c3292b8265ed32ffdeccfdadf434a8", + "handler": "handler.py", + "registration_params": { + "batch_size": 1, + "max_batch_delay": 200, + "response_timeout": 2000 + } } } \ No newline at end of file