Skip to content

Commit

Permalink
Added support for llama2-chat and codellama models (#43)
Browse files Browse the repository at this point in the history
* Added support for llama2-chat and codellama models

* Updated documentation link in README.md
  • Loading branch information
AyushSawant18588 authored Dec 7, 2023
1 parent f0ab70f commit 0ced78f
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 28 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ This new solution includes:
- The management interface for enhanced terminal UI or standard CLI
- Support for a curated set of LLMs including Llama2, Falcon and MPT

Refer to the official [GPT-in-a-Box Documentation](https://opendocs.nutanix.com/gpt-in-a-box/kubernetes/getting_started/) to deploy and validate the inference server on Kubernetes cluster
Refer to the official [GPT-in-a-Box Documentation](https://opendocs.nutanix.com/gpt-in-a-box/overview/) to deploy and validate the inference server on Kubernetes cluster

### License
All source code and other contents in this repository are covered by the Nutanix License and Services Agreement, which is located at https://www.nutanix.com/legal/eula
14 changes: 5 additions & 9 deletions llm/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,19 +242,15 @@ class with relevant information.
)
else:
print(
"## If you want to create a model archive file with the supported models, "
"## If you want to create a model archive file for supported models, "
"make sure you're model name is present in the below : "
)
print(list(models.keys()))
print(
"If you want to create a model archive file for a custom model, there "
"are two methods:\n"
"1. If you have already downloaded the custom model files, please include"
" the --skip_download flag and provide the model_path directory which contains "
"the model files.\n"
"2. If you need to download the model files, provide the HuggingFace "
"repository ID along with a model_path driectory where the model "
"files are to be downloaded."
"\nIf you want to create a model archive file for"
" either a Custom Model or other HuggingFace models, "
"refer to the official GPT-in-a-Box documentation: "
"https://opendocs.nutanix.com/gpt-in-a-box/overview/"
)
sys.exit(1)

Expand Down
46 changes: 28 additions & 18 deletions llm/model_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
"batch_size" : 1,
"max_batch_delay" : 200,
"response_timeout" : 2000
},
"gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
}
},
"falcon_7b": {
"repo_id": "tiiuae/falcon-7b",
Expand All @@ -30,8 +29,7 @@
"batch_size" : 1,
"max_batch_delay" : 200,
"response_timeout" : 2000
},
"gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
}
},
"llama2_7b": {
"repo_id": "meta-llama/Llama-2-7b-hf",
Expand All @@ -47,24 +45,36 @@
"batch_size" : 1,
"max_batch_delay" : 200,
"response_timeout" : 2000
},
"gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
},
"phi-1_5": {
"repo_id": "microsoft/phi-1_5",
"repo_version": "b6a7e2fe15c21f5847279f23e280cc5a0e7049ef",
"handler": "handler.py",
"registration_params":{
"batch_size" : 1,
"max_batch_delay" : 200,
"response_timeout" : 2000
},
"gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
}
},
"gpt2": {
"repo_id": "gpt2",
"repo_version": "11c5a3d5811f50298f278a704980280950aedb10",
"handler": "handler.py",
"gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"]
"registration_params": {
"batch_size": 1,
"max_batch_delay": 200,
"response_timeout": 2000
}
},
"codellama_7b_python": {
"repo_id": "codellama/CodeLlama-7b-Python-hf",
"repo_version": "7ee7b6beb0dece09b0431ea46c03bc1724e21572",
"handler": "handler.py",
"registration_params": {
"batch_size": 1,
"max_batch_delay": 200,
"response_timeout": 2000
}
},
"llama2_7b_chat": {
"repo_id": "meta-llama/Llama-2-7b-chat-hf",
"repo_version": "94b07a6e30c3292b8265ed32ffdeccfdadf434a8",
"handler": "handler.py",
"registration_params": {
"batch_size": 1,
"max_batch_delay": 200,
"response_timeout": 2000
}
}
}

0 comments on commit 0ced78f

Please sign in to comment.