-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enable LLM inference with llama.cpp and llama-cpp-python (#33)
* Add models tab ui for model conversion * Add changes to models tab UI * Add changes to models tab UI * Feature/llamacpp (#22) * Initial download snapshot & covert to GGUF using LLama 🐑 * Chat 💬 completion with llama cpp * added llama.cpp requirement * model conversion * HF snapshot download fix * Implement CMake build support and enhance text generation using gguf model. * Implement dynamic model path and make quantized_model directory * Add py_cmd to configs using Makefile * Add py_cmd to configs with cmake, dynamic python command for conversion --------- Co-authored-by: parveen kumar <[email protected]> Co-authored-by: Subhanshu0027 <[email protected]> * Feat: download and convert, select model from models tab (#24) * Feat: download and convert, select model from models tab * Refactor: remove unused line * Remove Converted gguf Models & Enhance UI in Models Tab (#26) * Add feature to remove converted gguf models & UI changes in models tab * Add remove model functionality to core.py * Optimize code formatting * Update README.md with new features * Select model from chat tab (#25) * feat: Add support for selecting execution provider, CPU or GPU with CUDA (#29) * refactor: Remove all remnants of transformers inference logic and associated code, fix removeModel (#30) * docs: Add installation and development tips, update (#32) * fix: dropdown for initially empty saved_gguf_models_list (#34) * fix: Model list not updating after download snapshot, remove model (#35) * fix: Model list not updating after download snapshot * fix: remove model --------- Co-authored-by: Subhanshu0027 <[email protected]> Co-authored-by: Subhanshu0027 <[email protected]> Co-authored-by: Juggernaut <[email protected]>
- Loading branch information
1 parent
b1b7053
commit 01417f3
Showing
11 changed files
with
399 additions
and
161 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Minimum required CMake version | ||
cmake_minimum_required(VERSION 3.15) | ||
|
||
# Project name | ||
project(llama_cpp) | ||
|
||
# Git repository location | ||
set(REPO_URL "https://github.com/ggerganov/llama.cpp") | ||
|
||
# Requirements file | ||
set(REQUIREMENTS_FILE "requirements.txt") | ||
|
||
# Llama directory | ||
set(LLAMA_DIR "${PROJECT_SOURCE_DIR}/src/llama_cpp") | ||
|
||
# Check for Python and Git using CMake's FIND_PACKAGE | ||
find_package(PythonLibs REQUIRED) | ||
find_package(Git REQUIRED) | ||
|
||
# Download and clone the llama.cpp repository | ||
execute_process( | ||
COMMAND git clone ${REPO_URL} ${LLAMA_DIR} | ||
RESULT_VARIABLE git_result | ||
) | ||
|
||
# Error handling for Git clone | ||
if(NOT ${git_result} EQUAL 0) | ||
message(FATAL_ERROR "Failed to clone llama.cpp repository") | ||
endif() | ||
|
||
# Install Python requirements | ||
execute_process( | ||
COMMAND pip install -r "${LLAMA_DIR}/${REQUIREMENTS_FILE}" | ||
) | ||
|
||
file(MAKE_DIRECTORY "${PROJECT_SOURCE_DIR}/src/quantized_model") | ||
|
||
find_program(PYTHON NAMES python python3 2>/dev/null) | ||
|
||
if(PYTHON) | ||
file(APPEND "${PROJECT_SOURCE_DIR}/configs/config.ini" "py_cmd = ${PYTHON}") | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Makefile to clone llama.cpp repository and install requirements | ||
|
||
# Variables | ||
REPO_URL := https://github.com/ggerganov/llama.cpp | ||
REQUIREMENTS_FILE := requirements.txt | ||
LLAMA_DIR := src/llama_cpp | ||
|
||
# Determine pip command | ||
PIP := $(shell command -v pip3 2>/dev/null || command -v pip) | ||
|
||
# Check if python and git are installed | ||
PYTHON := $(shell command -v python 2>/dev/null || command -v python3 2>/dev/null) | ||
GIT := $(shell command -v git) | ||
|
||
ifeq ($(PYTHON),) | ||
$(error Python is not installed. Please install Python before running this Makefile.) | ||
endif | ||
|
||
ifeq ($(GIT),) | ||
$(error Git is not installed. Please install Git before running this Makefile.) | ||
endif | ||
|
||
# Targets | ||
.PHONY: all clone install clean quantized_model_dir append_to_configs | ||
|
||
all: clone install quantized_model_dir append_to_configs | ||
|
||
clone: | ||
mkdir -p $(LLAMA_DIR) | ||
git clone $(REPO_URL) $(LLAMA_DIR) | ||
|
||
install: | ||
cd $(LLAMA_DIR) && \ | ||
$(PIP) install -r $(REQUIREMENTS_FILE) | ||
|
||
quantized_model_dir: | ||
mkdir -p src/quantized_model | ||
|
||
append_to_configs: | ||
echo "py_cmd = $(PYTHON)" >> configs/config.ini | ||
|
||
clean: | ||
rm -rf $(LLAMA_DIR) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,19 @@ | ||
## LLMinator: Run & Test LLMs locally | ||
#### Gradio based tool with integrated chatbot to locally run & test LLMs directly from HuggingFace. | ||
|
||
An easy-to-use tool made with Gradio, LangChain, and Torch. | ||
|
||
![image](https://github.com/Aesthisia/LLMinator/assets/91900622/54cc0b3f-c5a8-4470-bcc5-a22e5fd24707) | ||
#### Gradio based tool with integrated chatbot to locally run & test LLMs directly from HuggingFace. | ||
|
||
An easy-to-use tool made with Gradio, LangChain, and Torch. | ||
|
||
![LLMinator chat tab](https://github.com/Aesthisia/LLMinator/assets/89995648/0c7fd00f-610b-4ad1-8736-1f0cb7d212de) | ||
![LLMinator models tab](https://github.com/Aesthisia/LLMinator/assets/89995648/44c03281-fb76-40c6-b1d3-2e395562ae16) | ||
|
||
### ⚡ Features | ||
|
||
- Context-aware Chatbot. | ||
- Inbuilt code syntax highlighting. | ||
- Context-aware Chatbot. | ||
- Inbuilt code syntax highlighting. | ||
- Load any LLM repo directly from HuggingFace. | ||
- Supports both CPU & Cuda modes. | ||
- Supports both CPU & CUDA modes. | ||
- Enable LLM inference with [llama.cpp](https://github.com/ggerganov/llama.cpp) using [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) | ||
|
||
## 🚀 How to use | ||
|
||
|
@@ -21,17 +22,69 @@ To use LLMinator, follow these simple steps: | |
- Clone the LLMinator repository from GitHub. | ||
- Navigate to the directory containing the cloned repository. | ||
- Install the required dependencies by running `pip install -r requirements.txt`. | ||
- Build LLMinator with llama.cpp : | ||
|
||
- Using `make`: | ||
|
||
- On Linux or MacOS: | ||
|
||
```bash | ||
make | ||
``` | ||
|
||
- On Windows: | ||
|
||
1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases). | ||
2. Extract `w64devkit` on your pc. | ||
3. Run `w64devkit.exe`. | ||
4. Use the `cd` command to reach the `LLMinator` folder. | ||
5. From here you can run: | ||
```bash | ||
make | ||
``` | ||
|
||
- Using `CMake`: | ||
```bash | ||
mkdir build | ||
cd build | ||
cmake .. | ||
``` | ||
|
||
- Run the LLMinator tool using the command `python webui.py`. | ||
- Access the web interface by opening the provided URL in your browser. | ||
- Start interacting with the chatbot and experimenting with LLMs! | ||
|
||
### Command line arguments | ||
### Command line arguments | ||
|
||
| Argument Command | Default | Description | | ||
| ---------------- | --------- | --------------------------------------------------------------------------- | | ||
| --host | 127.0.0.1 | Host or IP address on which the server will listen for incoming connections | | ||
| --port | 7860 | Launch gradio with given server port | | ||
| --share | False | This generates a public shareable link that you can send to anybody | | ||
|
||
## Installation and Development Tips | ||
|
||
**Python Version:** | ||
|
||
- **Compatible Versions:** This project is compatible with Python versions 3.8+ to 3.11. Ensure you have one of these versions installed on your system. You can check your Python version by running `python --version` or `python3 --version` in your terminal. | ||
|
||
**Cmake and C Compiler:** | ||
|
||
- **Cmake Dependency:** If you plan to build the project using Cmake, make sure you have Cmake installed. | ||
- **C Compiler:** Additionally, you'll need a C compiler such as GCC. These are typically included with most Linux distributions. You can check this by running `gcc --version` in your terminal. Installation instructions for your specific operating system can be found online. | ||
**Visual Studio Code:** | ||
- **Visual Studio Installer:** If you're using Visual Studio Code for development, you'll need the C++ development workload installed. You can achieve this through the [Visual Studio Installer](https://visualstudio.microsoft.com/vs/features/cplusplus/) | ||
**GPU Acceleration (CUDA):** | ||
- **CUDA Installation:** To leverage GPU acceleration, you'll need CUDA installed on your system. Download instructions are available on the [NVIDIA website](https://developer.nvidia.com/cuda-toolkit). | ||
- **Torch Compatibility:** After installing CUDA, confirm CUDA availability with `torch.cuda.is_available()`. When using a GPU, ensure you follow the project's specific `llama-cpp-python` installation configuration for CUDA support. | ||
## Reporting Issues: | ||
| Argument Command | Default | Description | | ||
| ---------- | ---------- | ---------- | | ||
| --host | 127.0.0.1 | Host or IP address on which the server will listen for incoming connections | | ||
| --port | 7860 | Launch gradio with given server port | | ||
| --share | False | This generates a public shareable link that you can send to anybody | | ||
If you encounter any errors or issues, feel free to file a detailed report in the project's repository. We're always happy to help! When reporting an issue, please provide as much information as possible, including the error message, logs, the steps you took, and your system configuration. This makes it easier for us to diagnose and fix the problem quickly. | ||
## 🤝 Contributions | ||
|
@@ -42,4 +95,4 @@ We welcome contributions from the community to enhance LLMinator further. If you | |
- Test your changes thoroughly. | ||
- Submit a pull request, providing a clear description of the changes you've made. | ||
Reach out to us: [email protected] | ||
Reach out to us: [email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
[Settings] | ||
execution_provider = | ||
repo_id = | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
gradio==4.24.0 | ||
gradio==4.27.0 | ||
huggingface_hub==0.21.1 | ||
langchain==0.1.14 | ||
torch==2.2.1 | ||
transformers==4.39.1 | ||
torch==2.1.2 | ||
llama-cpp-python==0.1.9 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import subprocess, os | ||
from huggingface_hub import snapshot_download | ||
from configparser import ConfigParser | ||
|
||
config_path = "./configs/config.ini" | ||
|
||
def get_py_cmd(): | ||
config = ConfigParser() | ||
config.read(config_path) | ||
py_cmd = config.get('Settings', 'py_cmd') | ||
if "python3" in py_cmd: | ||
return 'python3' | ||
else: | ||
return 'python' | ||
|
||
def quantize_model(repo_id): | ||
original_models_path = "./src/original_model/" | ||
quantized_path = "./src/quantized_model/" | ||
|
||
repo_id_parts = repo_id.split("/") | ||
model_folder = f"model--{'--'.join(repo_id_parts)}" | ||
model_path = original_models_path + model_folder | ||
|
||
outfile = quantized_path + repo_id.replace("/", "__") + ".gguf" | ||
|
||
if os.path.isfile(outfile): | ||
return outfile | ||
|
||
snapshot_download(repo_id=repo_id, local_dir=model_path , local_dir_use_symlinks=True) | ||
|
||
command = [ | ||
get_py_cmd(), | ||
'./src/llama_cpp/convert-hf-to-gguf.py', | ||
model_path, | ||
'--outtype', 'f16', | ||
'--outfile', outfile | ||
] | ||
|
||
subprocess.run(command, check=True) | ||
|
||
return outfile |
Oops, something went wrong.