Skip to content

Commit

Permalink
Enable LLM inference with llama.cpp and llama-cpp-python (#33)
Browse files Browse the repository at this point in the history
* Add models tab ui for model conversion

* Add changes to models tab UI

* Add changes to models tab UI

* Feature/llamacpp (#22)

* Initial download snapshot & covert to GGUF using LLama 🐑

* Chat 💬 completion with llama cpp

* added llama.cpp requirement

* model conversion

* HF snapshot download fix

* Implement CMake build support and enhance text generation using gguf model.

* Implement dynamic model path and make quantized_model directory

* Add py_cmd to configs using Makefile

* Add py_cmd to configs with cmake, dynamic python command for conversion

---------

Co-authored-by: parveen kumar <[email protected]>
Co-authored-by: Subhanshu0027 <[email protected]>

* Feat: download and convert, select model from models tab (#24)

* Feat: download and convert, select model from models tab

* Refactor: remove unused line

* Remove Converted gguf Models & Enhance UI in Models Tab (#26)

* Add feature to remove converted gguf models & UI changes in models tab

* Add remove model functionality to core.py

* Optimize code formatting

* Update README.md with new features

* Select model from chat tab (#25)

* feat: Add support for selecting execution provider, CPU or GPU with CUDA (#29)

* refactor: Remove all remnants of transformers inference logic and associated code, fix removeModel (#30)

* docs: Add installation and development tips, update (#32)

* fix: dropdown for initially empty saved_gguf_models_list (#34)

* fix: Model list not updating after download snapshot, remove model (#35)

* fix: Model list not updating after download snapshot

* fix: remove model

---------

Co-authored-by: Subhanshu0027 <[email protected]>
Co-authored-by: Subhanshu0027 <[email protected]>
Co-authored-by: Juggernaut <[email protected]>
  • Loading branch information
4 people authored May 8, 2024
1 parent b1b7053 commit 01417f3
Show file tree
Hide file tree
Showing 11 changed files with 399 additions and 161 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,13 @@ models/.locks
models/tmp*
configs/config.ini

src/original_model/*
src/quantized_model/*

src/llama_cpp

#ignore build folder(for cmake)
build

#compiled files
*.pyc
42 changes: 42 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Minimum required CMake version
cmake_minimum_required(VERSION 3.15)

# Project name
project(llama_cpp)

# Git repository location
set(REPO_URL "https://github.com/ggerganov/llama.cpp")

# Requirements file
set(REQUIREMENTS_FILE "requirements.txt")

# Llama directory
set(LLAMA_DIR "${PROJECT_SOURCE_DIR}/src/llama_cpp")

# Check for Python and Git using CMake's FIND_PACKAGE
find_package(PythonLibs REQUIRED)
find_package(Git REQUIRED)

# Download and clone the llama.cpp repository
execute_process(
COMMAND git clone ${REPO_URL} ${LLAMA_DIR}
RESULT_VARIABLE git_result
)

# Error handling for Git clone
if(NOT ${git_result} EQUAL 0)
message(FATAL_ERROR "Failed to clone llama.cpp repository")
endif()

# Install Python requirements
execute_process(
COMMAND pip install -r "${LLAMA_DIR}/${REQUIREMENTS_FILE}"
)

file(MAKE_DIRECTORY "${PROJECT_SOURCE_DIR}/src/quantized_model")

find_program(PYTHON NAMES python python3 2>/dev/null)

if(PYTHON)
file(APPEND "${PROJECT_SOURCE_DIR}/configs/config.ini" "py_cmd = ${PYTHON}")
endif()
43 changes: 43 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Makefile to clone llama.cpp repository and install requirements

# Variables
REPO_URL := https://github.com/ggerganov/llama.cpp
REQUIREMENTS_FILE := requirements.txt
LLAMA_DIR := src/llama_cpp

# Determine pip command
PIP := $(shell command -v pip3 2>/dev/null || command -v pip)

# Check if python and git are installed
PYTHON := $(shell command -v python 2>/dev/null || command -v python3 2>/dev/null)
GIT := $(shell command -v git)

ifeq ($(PYTHON),)
$(error Python is not installed. Please install Python before running this Makefile.)
endif

ifeq ($(GIT),)
$(error Git is not installed. Please install Git before running this Makefile.)
endif

# Targets
.PHONY: all clone install clean quantized_model_dir append_to_configs

all: clone install quantized_model_dir append_to_configs

clone:
mkdir -p $(LLAMA_DIR)
git clone $(REPO_URL) $(LLAMA_DIR)

install:
cd $(LLAMA_DIR) && \
$(PIP) install -r $(REQUIREMENTS_FILE)

quantized_model_dir:
mkdir -p src/quantized_model

append_to_configs:
echo "py_cmd = $(PYTHON)" >> configs/config.ini

clean:
rm -rf $(LLAMA_DIR)
81 changes: 67 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
## LLMinator: Run & Test LLMs locally
#### Gradio based tool with integrated chatbot to locally run & test LLMs directly from HuggingFace.

An easy-to-use tool made with Gradio, LangChain, and Torch.

![image](https://github.com/Aesthisia/LLMinator/assets/91900622/54cc0b3f-c5a8-4470-bcc5-a22e5fd24707)
#### Gradio based tool with integrated chatbot to locally run & test LLMs directly from HuggingFace.

An easy-to-use tool made with Gradio, LangChain, and Torch.

![LLMinator chat tab](https://github.com/Aesthisia/LLMinator/assets/89995648/0c7fd00f-610b-4ad1-8736-1f0cb7d212de)
![LLMinator models tab](https://github.com/Aesthisia/LLMinator/assets/89995648/44c03281-fb76-40c6-b1d3-2e395562ae16)

### ⚡ Features

- Context-aware Chatbot.
- Inbuilt code syntax highlighting.
- Context-aware Chatbot.
- Inbuilt code syntax highlighting.
- Load any LLM repo directly from HuggingFace.
- Supports both CPU & Cuda modes.
- Supports both CPU & CUDA modes.
- Enable LLM inference with [llama.cpp](https://github.com/ggerganov/llama.cpp) using [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)

## 🚀 How to use

Expand All @@ -21,17 +22,69 @@ To use LLMinator, follow these simple steps:
- Clone the LLMinator repository from GitHub.
- Navigate to the directory containing the cloned repository.
- Install the required dependencies by running `pip install -r requirements.txt`.
- Build LLMinator with llama.cpp :

- Using `make`:

- On Linux or MacOS:

```bash
make
```

- On Windows:

1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
2. Extract `w64devkit` on your pc.
3. Run `w64devkit.exe`.
4. Use the `cd` command to reach the `LLMinator` folder.
5. From here you can run:
```bash
make
```

- Using `CMake`:
```bash
mkdir build
cd build
cmake ..
```

- Run the LLMinator tool using the command `python webui.py`.
- Access the web interface by opening the provided URL in your browser.
- Start interacting with the chatbot and experimenting with LLMs!

### Command line arguments
### Command line arguments

| Argument Command | Default | Description |
| ---------------- | --------- | --------------------------------------------------------------------------- |
| --host | 127.0.0.1 | Host or IP address on which the server will listen for incoming connections |
| --port | 7860 | Launch gradio with given server port |
| --share | False | This generates a public shareable link that you can send to anybody |

## Installation and Development Tips

**Python Version:**

- **Compatible Versions:** This project is compatible with Python versions 3.8+ to 3.11. Ensure you have one of these versions installed on your system. You can check your Python version by running `python --version` or `python3 --version` in your terminal.

**Cmake and C Compiler:**

- **Cmake Dependency:** If you plan to build the project using Cmake, make sure you have Cmake installed.
- **C Compiler:** Additionally, you'll need a C compiler such as GCC. These are typically included with most Linux distributions. You can check this by running `gcc --version` in your terminal. Installation instructions for your specific operating system can be found online.
**Visual Studio Code:**
- **Visual Studio Installer:** If you're using Visual Studio Code for development, you'll need the C++ development workload installed. You can achieve this through the [Visual Studio Installer](https://visualstudio.microsoft.com/vs/features/cplusplus/)
**GPU Acceleration (CUDA):**
- **CUDA Installation:** To leverage GPU acceleration, you'll need CUDA installed on your system. Download instructions are available on the [NVIDIA website](https://developer.nvidia.com/cuda-toolkit).
- **Torch Compatibility:** After installing CUDA, confirm CUDA availability with `torch.cuda.is_available()`. When using a GPU, ensure you follow the project's specific `llama-cpp-python` installation configuration for CUDA support.
## Reporting Issues:
| Argument Command | Default | Description |
| ---------- | ---------- | ---------- |
| --host | 127.0.0.1 | Host or IP address on which the server will listen for incoming connections |
| --port | 7860 | Launch gradio with given server port |
| --share | False | This generates a public shareable link that you can send to anybody |
If you encounter any errors or issues, feel free to file a detailed report in the project's repository. We're always happy to help! When reporting an issue, please provide as much information as possible, including the error message, logs, the steps you took, and your system configuration. This makes it easier for us to diagnose and fix the problem quickly.
## 🤝 Contributions
Expand All @@ -42,4 +95,4 @@ We welcome contributions from the community to enhance LLMinator further. If you
- Test your changes thoroughly.
- Submit a pull request, providing a clear description of the changes you've made.
Reach out to us: [email protected]
Reach out to us: [email protected]
2 changes: 2 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@


1 change: 0 additions & 1 deletion configs/config.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
[Settings]
execution_provider =
repo_id =

44 changes: 25 additions & 19 deletions core.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,36 @@
import os, shutil
from configparser import ConfigParser
import gradio as gr

default_repo_id = "stabilityai/stable-code-instruct-3b"
config_path = "configs/config.ini"
default_repo_id_parts = default_repo_id.split("/")
default_model_folder = f"models--{'--'.join(default_repo_id_parts)}"
cache_gguf_dir = os.path.join(os.getcwd(), "src/quantized_model")
cache_original_dir = os.path.join(os.getcwd(), "src/original_model")

def format_model_name(directory_name):
parts = directory_name.split("--")
return "/".join(parts[1:])
def format_gguf_model_name(file_name):
parts = file_name.replace('.gguf', '').split("__")
return "/".join(parts)

def list_download_models(cache_dir):
contents = os.listdir(cache_dir)
directories = [format_model_name(item) for item in contents if os.path.isdir(os.path.join(cache_dir, item)) and item.startswith("models")]
return directories
def list_converted_gguf_models(cache_gguf_dir):
contents = os.listdir(cache_gguf_dir)
model_files = [format_gguf_model_name(item) for item in contents]
return model_files

def remove_dir(path):
try:
for model in os.listdir(path):
if model != default_model_folder:
model_path = os.path.join(path, model)
if os.path.isdir(model_path):
shutil.rmtree(model_path)
print("successfully removed cached models!")
except OSError as e:
print(f"Error: {e.strerror}")
def removeModelFromCache(model_name):
config = ConfigParser()
config.read(config_path)
repo_id = config.get('Settings', 'repo_id')
if model_name == repo_id:
raise gr.Error("Can not delete default model")
else:
gguf_model_name = model_name.replace("/", "__") + ".gguf"
original_model_parts = model_name.split("/")
original_model_name = f"model--{'--'.join(original_model_parts)}"
try:
os.remove(os.path.join(cache_gguf_dir, gguf_model_name))
shutil.rmtree(os.path.join(cache_original_dir, original_model_name))
except FileNotFoundError:
raise gr.Error("Model not found in cache.")

def read_config():
config = ConfigParser()
Expand Down
1 change: 0 additions & 1 deletion models/models.txt

This file was deleted.

7 changes: 4 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
gradio==4.24.0
gradio==4.27.0
huggingface_hub==0.21.1
langchain==0.1.14
torch==2.2.1
transformers==4.39.1
torch==2.1.2
llama-cpp-python==0.1.9
41 changes: 41 additions & 0 deletions src/quantize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import subprocess, os
from huggingface_hub import snapshot_download
from configparser import ConfigParser

config_path = "./configs/config.ini"

def get_py_cmd():
config = ConfigParser()
config.read(config_path)
py_cmd = config.get('Settings', 'py_cmd')
if "python3" in py_cmd:
return 'python3'
else:
return 'python'

def quantize_model(repo_id):
original_models_path = "./src/original_model/"
quantized_path = "./src/quantized_model/"

repo_id_parts = repo_id.split("/")
model_folder = f"model--{'--'.join(repo_id_parts)}"
model_path = original_models_path + model_folder

outfile = quantized_path + repo_id.replace("/", "__") + ".gguf"

if os.path.isfile(outfile):
return outfile

snapshot_download(repo_id=repo_id, local_dir=model_path , local_dir_use_symlinks=True)

command = [
get_py_cmd(),
'./src/llama_cpp/convert-hf-to-gguf.py',
model_path,
'--outtype', 'f16',
'--outfile', outfile
]

subprocess.run(command, check=True)

return outfile
Loading

0 comments on commit 01417f3

Please sign in to comment.